From 5777fe13577332b304c89bb8bf4975453e63d65a Mon Sep 17 00:00:00 2001 From: Mehdi Dogguy <mehdi@debian.org> Date: Mon, 8 Sep 2014 21:31:41 +0200 Subject: [PATCH] Imported Upstream version 1.3.7 --- AUTHORS | 1 + META | 6 +- NEWS | 69 +- auxdir/x_ac_gtk.m4 | 6 +- configure | 13 +- configure.ac | 5 +- contribs/Makefile.am | 2 +- contribs/Makefile.in | 2 +- contribs/README | 5 + contribs/ptrace.patch | 5 +- contribs/python/Makefile.am | 1 + contribs/python/Makefile.in | 563 +++++++++++++ contribs/python/hostlist/COPYING | 339 ++++++++ contribs/python/hostlist/Makefile.am | 8 + contribs/python/hostlist/Makefile.in | 571 +++++++++++++ contribs/python/hostlist/PKG-INFO | 10 + contribs/python/hostlist/README | 43 + contribs/python/hostlist/hostlist.py | 426 ++++++++++ contribs/python/hostlist/setup.py | 14 + contribs/python/hostlist/test/Makefile.am | 2 + contribs/python/hostlist/test/Makefile.in | 407 ++++++++++ .../python/hostlist/test/test_hostlist.py | 123 +++ doc/html/accounting.shtml | 22 +- doc/html/configurator.html.in | 6 +- doc/html/download.shtml | 33 +- doc/html/faq.shtml | 38 +- doc/html/team.shtml | 6 +- doc/html/testimonials.shtml | 12 +- doc/man/man1/sacctmgr.1 | 31 +- doc/man/man1/scontrol.1 | 11 +- doc/man/man1/sreport.1 | 6 +- doc/man/man3/slurm_get_errno.3 | 2 +- doc/man/man3/slurm_reconfigure.3 | 10 +- doc/man/man5/slurm.conf.5 | 16 +- slurm.spec | 19 +- slurm/slurm.h.in | 8 +- src/api/job_info.c | 20 +- src/api/reconfigure.c | 14 +- src/api/signal.c | 4 +- src/api/step_launch.c | 22 +- src/common/assoc_mgr.c | 39 +- src/common/env.c | 3 - src/common/pack.h | 6 +- src/common/parse_config.c | 5 +- src/common/parse_time.c | 6 + src/common/plugstack.c | 27 +- src/common/print_fields.c | 75 +- src/common/print_fields.h | 20 +- src/common/read_config.c | 71 +- src/common/read_config.h | 16 - src/common/slurm_accounting_storage.c | 341 ++++---- src/common/slurm_accounting_storage.h | 5 + src/common/slurm_cred.c | 4 +- src/common/slurm_protocol_api.c | 53 +- src/common/slurm_protocol_api.h | 9 +- src/common/slurm_protocol_defs.c | 1 + src/common/slurm_protocol_defs.h | 9 +- src/common/slurm_protocol_pack.c | 14 +- src/common/slurmdbd_defs.c | 353 ++++++--- src/common/slurmdbd_defs.h | 3 +- src/common/uid.c | 128 ++- src/common/uid.h | 19 +- src/database/gold_interface.c | 6 +- .../filetxt/accounting_storage_filetxt.c | 7 +- .../filetxt/filetxt_jobacct_process.c | 44 +- .../gold/accounting_storage_gold.c | 24 +- .../mysql/accounting_storage_mysql.c | 569 ++++++++----- .../mysql/mysql_jobacct_process.c | 2 +- .../slurmdbd/accounting_storage_slurmdbd.c | 7 +- src/plugins/auth/munge/auth_munge.c | 3 +- src/plugins/jobcomp/filetxt/jobcomp_filetxt.c | 41 +- src/plugins/jobcomp/mysql/jobcomp_mysql.c | 25 +- src/plugins/jobcomp/pgsql/jobcomp_pgsql.c | 25 +- src/plugins/sched/backfill/backfill_wrapper.c | 2 +- src/plugins/sched/wiki/get_jobs.c | 48 +- src/plugins/sched/wiki/msg.c | 12 +- src/plugins/sched/wiki2/get_jobs.c | 20 +- src/plugins/sched/wiki2/msg.c | 12 +- .../select/bluegene/plugin/bg_job_place.c | 34 +- .../select/bluegene/plugin/bg_job_run.c | 10 +- .../bluegene/plugin/bg_record_functions.c | 20 +- .../select/bluegene/plugin/block_sys.c | 25 +- .../select/bluegene/plugin/select_bluegene.c | 13 +- src/plugins/select/cons_res/select_cons_res.c | 12 +- src/sacct/options.c | 4 + src/sacct/print.c | 2 +- src/sacct/sacct_stat.c | 2 +- src/sacctmgr/account_functions.c | 190 ++++- src/sacctmgr/association_functions.c | 14 +- src/sacctmgr/cluster_functions.c | 36 +- src/sacctmgr/common.c | 46 +- src/sacctmgr/file_functions.c | 337 ++++---- src/sacctmgr/sacctmgr.c | 83 +- src/sacctmgr/sacctmgr.h | 6 +- src/sacctmgr/txn_functions.c | 3 +- src/sacctmgr/user_functions.c | 243 ++++-- src/salloc/salloc.c | 13 +- src/sattach/sattach.c | 9 +- src/sbatch/opt.c | 25 +- src/sbatch/sbatch.c | 30 +- src/sbcast/agent.c | 2 +- src/scancel/scancel.c | 4 +- src/scontrol/scontrol.c | 33 +- src/slurmctld/agent.c | 19 +- src/slurmctld/controller.c | 122 ++- src/slurmctld/job_mgr.c | 114 ++- src/slurmctld/job_scheduler.c | 3 + src/slurmctld/node_mgr.c | 82 +- src/slurmctld/node_scheduler.c | 1 + src/slurmctld/partition_mgr.c | 75 +- src/slurmctld/ping_nodes.c | 42 +- src/slurmctld/proc_req.c | 23 +- src/slurmctld/read_config.c | 2 +- src/slurmctld/slurmctld.h | 39 +- src/slurmctld/step_mgr.c | 4 +- src/slurmctld/trigger_mgr.c | 7 +- src/slurmd/slurmd/req.c | 127 ++- src/slurmd/slurmstepd/mgr.c | 26 +- src/slurmd/slurmstepd/pdebug.c | 2 +- src/slurmd/slurmstepd/req.c | 49 +- src/slurmd/slurmstepd/slurmstepd_job.c | 8 +- src/slurmd/slurmstepd/slurmstepd_job.h | 3 +- src/slurmdbd/proc_req.c | 104 ++- src/slurmdbd/read_config.c | 17 +- src/smap/job_functions.c | 11 +- src/squeue/print.c | 16 +- src/squeue/sort.c | 4 + src/sreport/cluster_reports.c | 37 +- src/sreport/common.c | 14 +- src/sreport/job_reports.c | 8 +- src/sreport/sreport.c | 17 +- src/sreport/sreport.h | 2 +- src/sreport/user_reports.c | 24 +- src/srun/allocate.c | 2 +- src/srun/opt.c | 4 +- src/srun/srun.c | 102 ++- src/srun/srun_job.c | 25 +- src/sstat/sstat.c | 2 +- src/sview/job_info.c | 39 +- testsuite/expect/README | 15 +- testsuite/expect/globals | 128 ++- testsuite/expect/regression | 4 +- testsuite/expect/test1.15 | 6 +- testsuite/expect/test1.18 | 4 +- testsuite/expect/test1.19 | 8 +- testsuite/expect/test1.22 | 8 +- testsuite/expect/test1.23 | 30 +- testsuite/expect/test1.24 | 10 +- testsuite/expect/test1.27 | 2 +- testsuite/expect/test1.29 | 2 +- testsuite/expect/test1.30 | 2 +- testsuite/expect/test1.32 | 7 +- testsuite/expect/test1.34 | 87 ++ testsuite/expect/test1.34.prog.c | 33 + testsuite/expect/test1.38 | 40 +- testsuite/expect/test1.43 | 2 +- testsuite/expect/test1.44 | 9 +- testsuite/expect/test1.46 | 2 +- testsuite/expect/test1.51 | 1 + testsuite/expect/test1.52 | 4 +- testsuite/expect/test1.81 | 6 +- testsuite/expect/test1.82 | 6 +- testsuite/expect/test1.83 | 31 +- testsuite/expect/test1.84 | 8 +- testsuite/expect/test1.86 | 55 +- testsuite/expect/test1.87 | 8 +- testsuite/expect/test1.89 | 4 +- testsuite/expect/test1.90 | 60 +- testsuite/expect/test1.91 | 2 +- testsuite/expect/test1.92 | 3 + testsuite/expect/test1.93 | 4 +- testsuite/expect/test10.1 | 6 +- testsuite/expect/test10.10 | 9 +- testsuite/expect/test10.11 | 5 + testsuite/expect/test10.12 | 4 + testsuite/expect/test10.13 | 7 +- testsuite/expect/test10.2 | 6 +- testsuite/expect/test10.3 | 6 +- testsuite/expect/test10.4 | 6 +- testsuite/expect/test10.5 | 6 +- testsuite/expect/test10.6 | 6 +- testsuite/expect/test10.7 | 6 +- testsuite/expect/test10.8 | 9 +- testsuite/expect/test10.9 | 5 + testsuite/expect/test11.2 | 6 +- testsuite/expect/test11.7 | 8 +- testsuite/expect/test12.2 | 24 +- testsuite/expect/test15.11 | 6 + testsuite/expect/test15.12 | 6 + testsuite/expect/test15.13 | 3 +- testsuite/expect/test15.17 | 2 +- testsuite/expect/test15.22 | 6 +- testsuite/expect/test16.4 | 2 +- testsuite/expect/test17.12 | 8 + testsuite/expect/test17.13 | 2 +- testsuite/expect/test17.15 | 2 +- testsuite/expect/test17.17 | 2 +- testsuite/expect/test17.19 | 2 +- testsuite/expect/test17.23 | 6 + testsuite/expect/test17.31 | 2 +- testsuite/expect/test19.3 | 2 +- testsuite/expect/test19.4 | 2 +- testsuite/expect/test19.5 | 2 +- testsuite/expect/test19.6 | 2 +- testsuite/expect/test19.7 | 2 +- testsuite/expect/test21.1 | 2 +- testsuite/expect/test21.10 | 244 ++++++ testsuite/expect/test21.11 | 275 +++++++ testsuite/expect/test21.12 | 305 +++++++ testsuite/expect/test21.13 | 629 +++++++++++++++ testsuite/expect/test21.14 | 750 ++++++++++++++++++ testsuite/expect/test21.15 | 291 +++++++ testsuite/expect/test21.16 | 363 +++++++++ testsuite/expect/test21.17 | 397 +++++++++ testsuite/expect/test21.18 | 397 +++++++++ testsuite/expect/test21.19 | 361 +++++++++ testsuite/expect/test21.4 | 2 +- testsuite/expect/test21.5 | 19 +- testsuite/expect/test21.6 | 16 +- testsuite/expect/test21.7 | 16 +- testsuite/expect/test21.8 | 16 +- testsuite/expect/test21.9 | 16 +- testsuite/expect/test3.1 | 39 + testsuite/expect/test3.2 | 8 +- testsuite/expect/test3.3 | 6 +- testsuite/expect/test3.5 | 2 +- testsuite/expect/test3.9 | 1 + testsuite/expect/test4.3 | 2 +- testsuite/expect/test4.9 | 4 +- testsuite/expect/test6.10 | 4 +- testsuite/expect/test6.13 | 2 +- testsuite/expect/test6.8 | 6 + testsuite/expect/test7.1 | 10 +- testsuite/expect/test7.10 | 2 +- testsuite/expect/test7.11 | 257 ++++++ testsuite/expect/test7.11.prog.c | 120 +++ testsuite/expect/test7.3 | 13 +- testsuite/expect/test7.7 | 2 +- testsuite/expect/test7.8 | 4 +- testsuite/expect/test7.9 | 2 +- testsuite/expect/test8.7 | 2 +- testsuite/expect/test9.1 | 16 +- testsuite/expect/test9.2 | 20 +- testsuite/expect/test9.3 | 16 +- testsuite/expect/test9.4 | 24 +- testsuite/expect/test9.5 | 16 +- testsuite/expect/test9.6 | 20 +- testsuite/expect/test9.8 | 2 +- 248 files changed, 10826 insertions(+), 1969 deletions(-) create mode 100644 contribs/python/Makefile.am create mode 100644 contribs/python/Makefile.in create mode 100644 contribs/python/hostlist/COPYING create mode 100644 contribs/python/hostlist/Makefile.am create mode 100644 contribs/python/hostlist/Makefile.in create mode 100644 contribs/python/hostlist/PKG-INFO create mode 100644 contribs/python/hostlist/README create mode 100755 contribs/python/hostlist/hostlist.py create mode 100644 contribs/python/hostlist/setup.py create mode 100644 contribs/python/hostlist/test/Makefile.am create mode 100644 contribs/python/hostlist/test/Makefile.in create mode 100644 contribs/python/hostlist/test/test_hostlist.py create mode 100755 testsuite/expect/test1.34 create mode 100644 testsuite/expect/test1.34.prog.c create mode 100755 testsuite/expect/test21.10 create mode 100755 testsuite/expect/test21.11 create mode 100755 testsuite/expect/test21.12 create mode 100755 testsuite/expect/test21.13 create mode 100755 testsuite/expect/test21.14 create mode 100755 testsuite/expect/test21.15 create mode 100755 testsuite/expect/test21.16 create mode 100755 testsuite/expect/test21.17 create mode 100755 testsuite/expect/test21.18 create mode 100755 testsuite/expect/test21.19 create mode 100755 testsuite/expect/test7.11 create mode 100644 testsuite/expect/test7.11.prog.c diff --git a/AUTHORS b/AUTHORS index 570f3dc3f..657216e85 100644 --- a/AUTHORS +++ b/AUTHORS @@ -9,6 +9,7 @@ Daniel Christians <Daniel.Christians(at)hp.com> Gilles Civario <gilles.civario(at)bull.net> Chris Dunlap <cdunlap(at)llnl.gov> Joey Ekstrom <ekstrom1(at)llnl.gov> +Kent Engstrom <kent(at>snc.liu.se> Jim Garlick <garlick(at)llnl.gov> Didier Gazen <gazdi(at)aero.obs-mip.fr> Mark Grondona <mgrondona(at)llnl.gov> diff --git a/META b/META index b498c1bb5..6202ef8e5 100644 --- a/META +++ b/META @@ -3,9 +3,9 @@ Api_revision: 0 Major: 1 Meta: 1 - Micro: 6 + Micro: 7 Minor: 3 Name: slurm Release: 1 - Release_tags: - Version: 1.3.6 + Release_tags: dist + Version: 1.3.7 diff --git a/NEWS b/NEWS index da0360eb0..df2024005 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,53 @@ This file describes changes in recent versions of SLURM. It primarily documents those changes that are of interest to users and admins. +* Changes in SLURM 1.3.7 +======================== + -- Add jobid/stepid to MESSAGE_TASK_EXIT to address race condition when + a job step is cancelled, another is started immediately (before the + first one completely terminates) and ports are reused. + NOTE: This change requires that SLURM be updated on all nodes of the + cluster at the same time. There will be no impact upon currently running + jobs (they will ignore the jobid/stepid at the end of the message). + -- Added Python module to process hostslists as used by SLURM. See + contribs/python/hostlist. Supplied by Kent Engstrom, National + Supercomputer Centre, Sweden. + -- Report task termination due to signal (restored functionality present + in slurm v1.2). + -- Remove sbatch test for script size being no larger than 64k bytes. + The current limit is 4GB. + -- Disable FastSchedule=0 use with SchedulerType=sched/gang. Node + configuration must be specified in slurm.conf for gang scheduling now. + -- For sched/wiki and sched/wiki2 (Maui or Moab scheduler) disable the ability + of a non-root user to change a job's comment field (used by Maui/Moab for + storing scheduler state information). + -- For sched/wiki (Maui) add pending job's future start time to the state + info reported to Maui. + -- Improve reliability of job requeue logic on node failure. + -- Add logic to ping non-responsive nodes even if SlurmdTimeout=0. This permits + the node to be returned to use when it starts responding rather than + remaining in a non-usable state. + -- Honor HealthCheckInterval values that are smaller than SlurmdTimeout. + -- For non-responding nodes, log them all on a single line with a hostlist + expression rather than one line per node. Frequency of log messages is + dependent upon SlurmctldDebug value from 300 seconds at SlurmctldDebug<=3 + to 1 second at SlurmctldDebug>=5. + -- If a DOWN node is resumed, set its state to IDLE & NOT_RESPONDING and + ping the node immediately to clear the NOT_RESPONDING flag. + -- Log that a job's time limit is reached, but don't sent SIGXCPU. + -- Fixed gid to be set in slurmstepd when run by root + -- Changed getpwent to getpwent_r in the slurmctld and slurmd + -- Increase timeout on most slurmdbd communications to 60 secs (time for + substantial database updates). + -- Treat srun option of --begin= with a value of now without a numeric + component as a failure (e.g. "--begin=now+hours"). + -- Eliminate a memory leak associated with notifying srun of allocated + nodes having failed. + -- Add scontrol shutdown option of "slurmctld" to just shutdown the + slurmctld daemon and leave the slurmd daemons running. + -- Do not require JobCredentialPrivateKey or JobCredentialPublicCertificate + in slurm.conf if using CryptoType=crypto/munge. + * Changes in SLURM 1.3.6 ======================== -- Add new function to get information for a single job rather than always @@ -403,6 +450,26 @@ documents those changes that are of interest to users and admins. Moved existing digital signature logic into new plugin: crypto/openssl. Added new support for crypto/munge (available with GPL license). +* Changes in SLURM 1.2.34 +========================= + -- Permit the cancellation of a job that is in the process of being + requeued. + -- Ignore the show_flag when getting job, step, node or partition information + for user root. + -- Convert some functions to thread-safe versions: getpwnam, getpwuid, + getgrnam, and getgrgid to similar functions with "_r" suffix. While no + failures have been observed, a race condition would in the worst case + permit a user access to a partition not normally allowed due to the + AllowGroup specification or the wrong user identified in an accounting + record. The job would NOT be run as the wrong user. + -- For PMI only (MPICH2/MVAPICH2) base address to send messages to (the srun) + upon the address from which slurmd gets the task launch request rather then + "hostname" where srun executes. + -- Make test for StateSaveLocation directory more comprehensive. + -- For jobcomp/script plugin, PROCS environment variable is now the actual + count of allocated processors rather than the count of processes to + be started. + * Changes in SLURM 1.2.33 ========================= -- Cancelled or Failed jobs will now report their job and step id on exit @@ -3369,4 +3436,4 @@ documents those changes that are of interest to users and admins. -- Change directory to /tmp in slurmd if daemonizing. -- Logfiles are reopened on reconfigure. -$Id: NEWS 14583 2008-07-21 17:18:13Z da $ +$Id: NEWS 14883 2008-08-25 21:36:27Z jette $ diff --git a/auxdir/x_ac_gtk.m4 b/auxdir/x_ac_gtk.m4 index ecdf3cf47..7ab71b2d0 100644 --- a/auxdir/x_ac_gtk.m4 +++ b/auxdir/x_ac_gtk.m4 @@ -46,7 +46,7 @@ AC_DEFUN([X_AC_GTK], # fi -### Check for gtk2.6 package +### Check for gtk2.7.1 package if test "$ac_have_gtk" == "yes" ; then $HAVEPKGCONFIG --exists gtk+-2.0 if ! test $? -eq 0 ; then @@ -60,8 +60,8 @@ AC_DEFUN([X_AC_GTK], gtk_config_micro_version=`$HAVEPKGCONFIG --modversion gtk+-2.0 | \ sed 's/\([[0-9]]*\).\([[0-9]]*\).\([[0-9]]*\)/\3/'` - if test $gtk_config_major_version -lt 2 || test $gtk_config_minor_version -lt 6 ; then - AC_MSG_WARN([*** gtk+-$gtk_config_major_version.$gtk_config_minor_version.$gtk_config_micro_version available, we need >= gtk+-2.6.0 installed for sview.]) + if test $gtk_config_major_version -lt 2 || test $gtk_config_minor_version -lt 7 || test $gtk_config_micro_version -lt 1; then + AC_MSG_WARN([*** gtk+-$gtk_config_major_version.$gtk_config_minor_version.$gtk_config_micro_version available, we need >= gtk+-2.7.1 installed for sview.]) ac_have_gtk="no" fi fi diff --git a/configure b/configure index 785c74f70..00ef5f35a 100755 --- a/configure +++ b/configure @@ -25271,7 +25271,7 @@ echo "$as_me: WARNING: *** pkg-config not found. Cannot probe for libglade-2.0 o # fi -### Check for gtk2.6 package +### Check for gtk2.8 package if test "$ac_have_gtk" == "yes" ; then $HAVEPKGCONFIG --exists gtk+-2.0 if ! test $? -eq 0 ; then @@ -25286,9 +25286,9 @@ echo "$as_me: WARNING: *** gtk+-2.0 is not available." >&2;} gtk_config_micro_version=`$HAVEPKGCONFIG --modversion gtk+-2.0 | \ sed 's/\([0-9]*\).\([0-9]*\).\([0-9]*\)/\3/'` - if test $gtk_config_major_version -lt 2 || test $gtk_config_minor_version -lt 6 ; then - { echo "$as_me:$LINENO: WARNING: *** gtk+-$gtk_config_major_version.$gtk_config_minor_version.$gtk_config_micro_version available, we need >= gtk+-2.6.0 installed for sview." >&5 -echo "$as_me: WARNING: *** gtk+-$gtk_config_major_version.$gtk_config_minor_version.$gtk_config_micro_version available, we need >= gtk+-2.6.0 installed for sview." >&2;} + if test $gtk_config_major_version -lt 2 || test $gtk_config_minor_version -lt 7 || test $gtk_config_micro_version -lt 1; then + { echo "$as_me:$LINENO: WARNING: *** gtk+-$gtk_config_major_version.$gtk_config_minor_version.$gtk_config_micro_version available, we need >= gtk+-2.7.1 installed for sview." >&5 +echo "$as_me: WARNING: *** gtk+-$gtk_config_major_version.$gtk_config_minor_version.$gtk_config_micro_version available, we need >= gtk+-2.7.1 installed for sview." >&2;} ac_have_gtk="no" fi fi @@ -27196,7 +27196,7 @@ _ACEOF -ac_config_files="$ac_config_files Makefile config.xml auxdir/Makefile contribs/Makefile contribs/perlapi/Makefile contribs/perlapi/libslurm-perl/Makefile.PL contribs/torque/Makefile contribs/phpext/Makefile contribs/phpext/slurm_php/config.m4 src/Makefile src/api/Makefile src/common/Makefile src/database/Makefile src/sacct/Makefile src/sacctmgr/Makefile src/sreport/Makefile src/sstat/Makefile src/salloc/Makefile src/sbatch/Makefile src/sattach/Makefile src/srun/Makefile src/slurmd/Makefile src/slurmd/slurmd/Makefile src/slurmd/slurmstepd/Makefile src/slurmdbd/Makefile src/slurmctld/Makefile src/sbcast/Makefile src/scontrol/Makefile src/scancel/Makefile src/squeue/Makefile src/sinfo/Makefile src/smap/Makefile src/strigger/Makefile src/sview/Makefile src/plugins/Makefile src/plugins/accounting_storage/Makefile src/plugins/accounting_storage/filetxt/Makefile src/plugins/accounting_storage/gold/Makefile src/plugins/accounting_storage/mysql/Makefile src/plugins/accounting_storage/pgsql/Makefile src/plugins/accounting_storage/none/Makefile src/plugins/accounting_storage/slurmdbd/Makefile src/plugins/auth/Makefile src/plugins/auth/authd/Makefile src/plugins/auth/munge/Makefile src/plugins/auth/none/Makefile src/plugins/checkpoint/Makefile src/plugins/checkpoint/aix/Makefile src/plugins/checkpoint/none/Makefile src/plugins/checkpoint/ompi/Makefile src/plugins/checkpoint/xlch/Makefile src/plugins/crypto/Makefile src/plugins/crypto/munge/Makefile src/plugins/crypto/openssl/Makefile src/plugins/jobacct_gather/Makefile src/plugins/jobacct_gather/linux/Makefile src/plugins/jobacct_gather/aix/Makefile src/plugins/jobacct_gather/none/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/jobcomp/pgsql/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/pgid/Makefile src/plugins/proctrack/linuxproc/Makefile src/plugins/proctrack/rms/Makefile src/plugins/proctrack/sgi_job/Makefile src/plugins/sched/Makefile src/plugins/sched/backfill/Makefile src/plugins/sched/builtin/Makefile src/plugins/sched/gang/Makefile src/plugins/sched/hold/Makefile src/plugins/sched/wiki/Makefile src/plugins/sched/wiki2/Makefile src/plugins/select/Makefile src/plugins/select/bluegene/Makefile src/plugins/select/bluegene/block_allocator/Makefile src/plugins/select/bluegene/plugin/Makefile src/plugins/select/linear/Makefile src/plugins/select/cons_res/Makefile src/plugins/switch/Makefile src/plugins/switch/elan/Makefile src/plugins/switch/none/Makefile src/plugins/switch/federation/Makefile src/plugins/mpi/Makefile src/plugins/mpi/mpich1_p4/Makefile src/plugins/mpi/mpich1_shmem/Makefile src/plugins/mpi/mpichgm/Makefile src/plugins/mpi/mpichmx/Makefile src/plugins/mpi/mvapich/Makefile src/plugins/mpi/lam/Makefile src/plugins/mpi/none/Makefile src/plugins/mpi/openmpi/Makefile src/plugins/task/Makefile src/plugins/task/affinity/Makefile src/plugins/task/none/Makefile doc/Makefile doc/man/Makefile doc/html/Makefile doc/html/configurator.html testsuite/Makefile testsuite/expect/Makefile testsuite/slurm_unit/Makefile testsuite/slurm_unit/common/Makefile testsuite/slurm_unit/slurmctld/Makefile testsuite/slurm_unit/slurmd/Makefile testsuite/slurm_unit/api/Makefile testsuite/slurm_unit/api/manual/Makefile" +ac_config_files="$ac_config_files Makefile config.xml auxdir/Makefile contribs/Makefile contribs/perlapi/Makefile contribs/perlapi/libslurm-perl/Makefile.PL contribs/torque/Makefile contribs/phpext/Makefile contribs/phpext/slurm_php/config.m4 contribs/python/Makefile contribs/python/hostlist/Makefile contribs/python/hostlist/test/Makefile src/Makefile src/api/Makefile src/common/Makefile src/database/Makefile src/sacct/Makefile src/sacctmgr/Makefile src/sreport/Makefile src/sstat/Makefile src/salloc/Makefile src/sbatch/Makefile src/sattach/Makefile src/srun/Makefile src/slurmd/Makefile src/slurmd/slurmd/Makefile src/slurmd/slurmstepd/Makefile src/slurmdbd/Makefile src/slurmctld/Makefile src/sbcast/Makefile src/scontrol/Makefile src/scancel/Makefile src/squeue/Makefile src/sinfo/Makefile src/smap/Makefile src/strigger/Makefile src/sview/Makefile src/plugins/Makefile src/plugins/accounting_storage/Makefile src/plugins/accounting_storage/filetxt/Makefile src/plugins/accounting_storage/gold/Makefile src/plugins/accounting_storage/mysql/Makefile src/plugins/accounting_storage/pgsql/Makefile src/plugins/accounting_storage/none/Makefile src/plugins/accounting_storage/slurmdbd/Makefile src/plugins/auth/Makefile src/plugins/auth/authd/Makefile src/plugins/auth/munge/Makefile src/plugins/auth/none/Makefile src/plugins/checkpoint/Makefile src/plugins/checkpoint/aix/Makefile src/plugins/checkpoint/none/Makefile src/plugins/checkpoint/ompi/Makefile src/plugins/checkpoint/xlch/Makefile src/plugins/crypto/Makefile src/plugins/crypto/munge/Makefile src/plugins/crypto/openssl/Makefile src/plugins/jobacct_gather/Makefile src/plugins/jobacct_gather/linux/Makefile src/plugins/jobacct_gather/aix/Makefile src/plugins/jobacct_gather/none/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/jobcomp/pgsql/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/pgid/Makefile src/plugins/proctrack/linuxproc/Makefile src/plugins/proctrack/rms/Makefile src/plugins/proctrack/sgi_job/Makefile src/plugins/sched/Makefile src/plugins/sched/backfill/Makefile src/plugins/sched/builtin/Makefile src/plugins/sched/gang/Makefile src/plugins/sched/hold/Makefile src/plugins/sched/wiki/Makefile src/plugins/sched/wiki2/Makefile src/plugins/select/Makefile src/plugins/select/bluegene/Makefile src/plugins/select/bluegene/block_allocator/Makefile src/plugins/select/bluegene/plugin/Makefile src/plugins/select/linear/Makefile src/plugins/select/cons_res/Makefile src/plugins/switch/Makefile src/plugins/switch/elan/Makefile src/plugins/switch/none/Makefile src/plugins/switch/federation/Makefile src/plugins/mpi/Makefile src/plugins/mpi/mpich1_p4/Makefile src/plugins/mpi/mpich1_shmem/Makefile src/plugins/mpi/mpichgm/Makefile src/plugins/mpi/mpichmx/Makefile src/plugins/mpi/mvapich/Makefile src/plugins/mpi/lam/Makefile src/plugins/mpi/none/Makefile src/plugins/mpi/openmpi/Makefile src/plugins/task/Makefile src/plugins/task/affinity/Makefile src/plugins/task/none/Makefile doc/Makefile doc/man/Makefile doc/html/Makefile doc/html/configurator.html testsuite/Makefile testsuite/expect/Makefile testsuite/slurm_unit/Makefile testsuite/slurm_unit/common/Makefile testsuite/slurm_unit/slurmctld/Makefile testsuite/slurm_unit/slurmd/Makefile testsuite/slurm_unit/api/Makefile testsuite/slurm_unit/api/manual/Makefile" cat >confcache <<\_ACEOF @@ -27921,6 +27921,9 @@ do "contribs/torque/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/torque/Makefile" ;; "contribs/phpext/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/phpext/Makefile" ;; "contribs/phpext/slurm_php/config.m4") CONFIG_FILES="$CONFIG_FILES contribs/phpext/slurm_php/config.m4" ;; + "contribs/python/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/python/Makefile" ;; + "contribs/python/hostlist/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/python/hostlist/Makefile" ;; + "contribs/python/hostlist/test/Makefile") CONFIG_FILES="$CONFIG_FILES contribs/python/hostlist/test/Makefile" ;; "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;; "src/api/Makefile") CONFIG_FILES="$CONFIG_FILES src/api/Makefile" ;; "src/common/Makefile") CONFIG_FILES="$CONFIG_FILES src/common/Makefile" ;; diff --git a/configure.ac b/configure.ac index d2c076132..cddb33cde 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -# $Id: configure.ac 14598 2008-07-21 20:12:45Z da $ +# $Id: configure.ac 14645 2008-07-28 20:32:12Z jette $ # This file is to be processed with autoconf to generate a configure script dnl Prologue @@ -278,6 +278,9 @@ AC_CONFIG_FILES([Makefile contribs/torque/Makefile contribs/phpext/Makefile contribs/phpext/slurm_php/config.m4 + contribs/python/Makefile + contribs/python/hostlist/Makefile + contribs/python/hostlist/test/Makefile src/Makefile src/api/Makefile src/common/Makefile diff --git a/contribs/Makefile.am b/contribs/Makefile.am index 74bb21836..7bf4cef72 100644 --- a/contribs/Makefile.am +++ b/contribs/Makefile.am @@ -1,4 +1,4 @@ -SUBDIRS = perlapi torque +SUBDIRS = perlapi python torque EXTRA_DIST = \ env_cache_builder.c \ diff --git a/contribs/Makefile.in b/contribs/Makefile.in index 4040f4ef3..a3bfe7509 100644 --- a/contribs/Makefile.in +++ b/contribs/Makefile.in @@ -245,7 +245,7 @@ target_os = @target_os@ target_vendor = @target_vendor@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ -SUBDIRS = perlapi torque +SUBDIRS = perlapi python torque EXTRA_DIST = \ env_cache_builder.c \ make.slurm.patch \ diff --git a/contribs/README b/contribs/README index c9792f45d..99b5bc42e 100644 --- a/contribs/README +++ b/contribs/README @@ -18,6 +18,9 @@ of the SLURM contribs distribution follows: API to SLURM using perl. Making available all SLURM command that exist in the SLURM proper API. + python/ [Python modules] + Directory for Python modules. + time_login.c [ C program ] This program will report how long a pseudo-login will take for specific users or all users on the system. Users identified by this program @@ -44,6 +47,8 @@ of the SLURM contribs distribution follows: modification. ptrace.patch [ Linux Kernel patch required to for TotalView use ] + 0. This has been fixed on most recent Linux kernels. Older versions of + Linux may need this patch support TotalView. 1. gdb and other tools cannot attach to a stopped process. The wait that follows the PTRACE_ATTACH will block indefinitely. 2. It is not possible to use PTRACE_DETACH to leave a process stopped, diff --git a/contribs/ptrace.patch b/contribs/ptrace.patch index d64c332f8..f947336bc 100644 --- a/contribs/ptrace.patch +++ b/contribs/ptrace.patch @@ -1,6 +1,6 @@ <http://linux.llnl.gov/software/kernel/ptrace.html> -CHAOS kernels must implement ptrace semantics required by the TotalView +The Linux kernels must implement ptrace semantics required by the TotalView debugger. In order to initiate a parallel job under debugger control, a resource manager or job launch utility must be able to start all tasks in a stopped state, notify TotalView, and then allow TotalView debugger @@ -9,7 +9,8 @@ servers to attach to all tasks. This functionality requires the ability to: * Detach from a traced process and leave the process stopped * Attach to a stopped process -Under Linux (CHAOS 1.2), both of the above are impossible without the +Most newer versions of the Linux kernel support this functionality. For +some older Linux kernels, both of the above are impossible without the following patch by Vic Zandy: * initial posting <http://seclists.org/linux-kernel/2001/Dec/5146.html> diff --git a/contribs/python/Makefile.am b/contribs/python/Makefile.am new file mode 100644 index 000000000..60fb9a077 --- /dev/null +++ b/contribs/python/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = hostlist diff --git a/contribs/python/Makefile.in b/contribs/python/Makefile.in new file mode 100644 index 000000000..54f0bb278 --- /dev/null +++ b/contribs/python/Makefile.in @@ -0,0 +1,563 @@ +# Makefile.in generated by automake 1.10.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = contribs/python +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_elan.m4 \ + $(top_srcdir)/auxdir/x_ac_federation.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_gtk.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ + html-recursive info-recursive install-data-recursive \ + install-dvi-recursive install-exec-recursive \ + install-html-recursive install-info-recursive \ + install-pdf-recursive install-ps-recursive install-recursive \ + installcheck-recursive installdirs-recursive pdf-recursive \ + ps-recursive uninstall-recursive +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BG_INCLUDES = @BG_INCLUDES@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DSYMUTIL = @DSYMUTIL@ +ECHO = @ECHO@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +ELAN_LIBS = @ELAN_LIBS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FEDERATION_LDFLAGS = @FEDERATION_LDFLAGS@ +FFLAGS = @FFLAGS@ +GREP = @GREP@ +GTK2_CFLAGS = @GTK2_CFLAGS@ +GTK2_LIBS = @GTK2_LIBS@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVEPKGCONFIG = @HAVEPKGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_ELAN = @HAVE_ELAN@ +HAVE_FEDERATION = @HAVE_FEDERATION@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PLPA_LIBS = @PLPA_LIBS@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +RELEASE = @RELEASE@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_VERSION = @SLURM_VERSION@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_F77 = @ac_ct_F77@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = hostlist +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu contribs/python/Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu contribs/python/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. +$(RECURSIVE_TARGETS): + @failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +$(RECURSIVE_CLEAN_TARGETS): + @failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + rev=''; for subdir in $$list; do \ + if test "$$subdir" = "."; then :; else \ + rev="$$subdir $$rev"; \ + fi; \ + done; \ + rev="$$rev ."; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done +ctags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique; \ + fi +ctags: CTAGS +CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done + list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + distdir=`$(am__cd) $(distdir) && pwd`; \ + top_distdir=`$(am__cd) $(top_distdir) && pwd`; \ + (cd $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$top_distdir" \ + distdir="$$distdir/$$subdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-exec-am: + +install-html: install-html-recursive + +install-info: install-info-recursive + +install-man: + +install-pdf: install-pdf-recursive + +install-ps: install-ps-recursive + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \ + install-strip + +.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ + all all-am check check-am clean clean-generic clean-libtool \ + ctags ctags-recursive distclean distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \ + uninstall uninstall-am + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/contribs/python/hostlist/COPYING b/contribs/python/hostlist/COPYING new file mode 100644 index 000000000..d511905c1 --- /dev/null +++ b/contribs/python/hostlist/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/contribs/python/hostlist/Makefile.am b/contribs/python/hostlist/Makefile.am new file mode 100644 index 000000000..244712630 --- /dev/null +++ b/contribs/python/hostlist/Makefile.am @@ -0,0 +1,8 @@ +SUBDIRS = test + +EXTRA_DIST = \ + COPYING \ + hostlist.py \ + PKG-INFO \ + README \ + setup.py diff --git a/contribs/python/hostlist/Makefile.in b/contribs/python/hostlist/Makefile.in new file mode 100644 index 000000000..064125ec9 --- /dev/null +++ b/contribs/python/hostlist/Makefile.in @@ -0,0 +1,571 @@ +# Makefile.in generated by automake 1.10.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = contribs/python/hostlist +DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in \ + COPYING +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_elan.m4 \ + $(top_srcdir)/auxdir/x_ac_federation.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_gtk.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +SOURCES = +DIST_SOURCES = +RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \ + html-recursive info-recursive install-data-recursive \ + install-dvi-recursive install-exec-recursive \ + install-html-recursive install-info-recursive \ + install-pdf-recursive install-ps-recursive install-recursive \ + installcheck-recursive installdirs-recursive pdf-recursive \ + ps-recursive uninstall-recursive +RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \ + distclean-recursive maintainer-clean-recursive +ETAGS = etags +CTAGS = ctags +DIST_SUBDIRS = $(SUBDIRS) +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BG_INCLUDES = @BG_INCLUDES@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DSYMUTIL = @DSYMUTIL@ +ECHO = @ECHO@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +ELAN_LIBS = @ELAN_LIBS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FEDERATION_LDFLAGS = @FEDERATION_LDFLAGS@ +FFLAGS = @FFLAGS@ +GREP = @GREP@ +GTK2_CFLAGS = @GTK2_CFLAGS@ +GTK2_LIBS = @GTK2_LIBS@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVEPKGCONFIG = @HAVEPKGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_ELAN = @HAVE_ELAN@ +HAVE_FEDERATION = @HAVE_FEDERATION@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PLPA_LIBS = @PLPA_LIBS@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +RELEASE = @RELEASE@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_VERSION = @SLURM_VERSION@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_F77 = @ac_ct_F77@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +SUBDIRS = test +EXTRA_DIST = \ + COPYING \ + hostlist.py \ + PKG-INFO \ + README \ + setup.py + +all: all-recursive + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu contribs/python/hostlist/Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu contribs/python/hostlist/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +# This directory's subdirectories are mostly independent; you can cd +# into them and run `make' without going through this Makefile. +# To change the values of `make' variables: instead of editing Makefiles, +# (1) if the variable is set in `config.status', edit `config.status' +# (which will cause the Makefiles to be regenerated when you run `make'); +# (2) otherwise, pass the desired values on the `make' command line. +$(RECURSIVE_TARGETS): + @failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + target=`echo $@ | sed s/-recursive//`; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + dot_seen=yes; \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done; \ + if test "$$dot_seen" = "no"; then \ + $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ + fi; test -z "$$fail" + +$(RECURSIVE_CLEAN_TARGETS): + @failcom='exit 1'; \ + for f in x $$MAKEFLAGS; do \ + case $$f in \ + *=* | --[!k]*);; \ + *k*) failcom='fail=yes';; \ + esac; \ + done; \ + dot_seen=no; \ + case "$@" in \ + distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \ + *) list='$(SUBDIRS)' ;; \ + esac; \ + rev=''; for subdir in $$list; do \ + if test "$$subdir" = "."; then :; else \ + rev="$$subdir $$rev"; \ + fi; \ + done; \ + rev="$$rev ."; \ + target=`echo $@ | sed s/-recursive//`; \ + for subdir in $$rev; do \ + echo "Making $$target in $$subdir"; \ + if test "$$subdir" = "."; then \ + local_target="$$target-am"; \ + else \ + local_target="$$target"; \ + fi; \ + (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ + || eval $$failcom; \ + done && test -z "$$fail" +tags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ + done +ctags-recursive: + list='$(SUBDIRS)'; for subdir in $$list; do \ + test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) ctags); \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: tags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \ + include_option=--etags-include; \ + empty_fix=.; \ + else \ + include_option=--include; \ + empty_fix=; \ + fi; \ + list='$(SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test ! -f $$subdir/TAGS || \ + tags="$$tags $$include_option=$$here/$$subdir/TAGS"; \ + fi; \ + done; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique; \ + fi +ctags: CTAGS +CTAGS: ctags-recursive $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done + list='$(DIST_SUBDIRS)'; for subdir in $$list; do \ + if test "$$subdir" = .; then :; else \ + test -d "$(distdir)/$$subdir" \ + || $(MKDIR_P) "$(distdir)/$$subdir" \ + || exit 1; \ + distdir=`$(am__cd) $(distdir) && pwd`; \ + top_distdir=`$(am__cd) $(top_distdir) && pwd`; \ + (cd $$subdir && \ + $(MAKE) $(AM_MAKEFLAGS) \ + top_distdir="$$top_distdir" \ + distdir="$$distdir/$$subdir" \ + am__remove_distdir=: \ + am__skip_length_check=: \ + distdir) \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-recursive +all-am: Makefile +installdirs: installdirs-recursive +installdirs-am: +install: install-recursive +install-exec: install-exec-recursive +install-data: install-data-recursive +uninstall: uninstall-recursive + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-recursive +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-recursive + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-recursive + -rm -f Makefile +distclean-am: clean-am distclean-generic distclean-tags + +dvi: dvi-recursive + +dvi-am: + +html: html-recursive + +info: info-recursive + +info-am: + +install-data-am: + +install-dvi: install-dvi-recursive + +install-exec-am: + +install-html: install-html-recursive + +install-info: install-info-recursive + +install-man: + +install-pdf: install-pdf-recursive + +install-ps: install-ps-recursive + +installcheck-am: + +maintainer-clean: maintainer-clean-recursive + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-recursive + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-recursive + +pdf-am: + +ps: ps-recursive + +ps-am: + +uninstall-am: + +.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) install-am \ + install-strip + +.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \ + all all-am check check-am clean clean-generic clean-libtool \ + ctags ctags-recursive distclean distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-man install-pdf install-pdf-am \ + install-ps install-ps-am install-strip installcheck \ + installcheck-am installdirs installdirs-am maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \ + uninstall uninstall-am + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/contribs/python/hostlist/PKG-INFO b/contribs/python/hostlist/PKG-INFO new file mode 100644 index 000000000..79b0649c9 --- /dev/null +++ b/contribs/python/hostlist/PKG-INFO @@ -0,0 +1,10 @@ +Metadata-Version: 1.0 +Name: python-hostlist +Version: 1.0 +Summary: Python module for hostlist handling +Home-page: http://www.nsc.liu.se/~kent/python-hostlist/ +Author: Kent Engström +Author-email: kent@nsc.liu.se +License: GPL2+ +Description: The hostlist.py module knows how to expand and collect LLNL hostlist expressions. +Platform: UNKNOWN diff --git a/contribs/python/hostlist/README b/contribs/python/hostlist/README new file mode 100644 index 000000000..03fa29342 --- /dev/null +++ b/contribs/python/hostlist/README @@ -0,0 +1,43 @@ +The Python module hostlist.py knows how to expand and collect +LLNL hostlist expressions. Example: + + % python + Python 2.5.1 (r251:54863, Jul 10 2008, 17:24:48) + [GCC 4.1.2 20070925 (Red Hat 4.1.2-33)] on linux2 + Type "help", "copyright", "credits" or "license" for more information. + >>> import hostlist + >>> hosts = hostlist.expand_hostlist("n[1-10,17]") + >>> hosts + ['n1', 'n2', 'n3', 'n4', 'n5', 'n6', 'n7', 'n8', 'n9', 'n10', 'n17'] + >>> hostlist.collect_hostlist(hosts) + 'n[1-10,17]' + >>> hostlist.collect_hostlist(["x1y1","x2y2", "x1y2", "x2y1"]) + 'x[1-2]y[1-2]' + +Bad hostlists or hostnames will result in the hostlist.BadHostlist +exception being raised. + +If you invoke hostlist.py from the command line, it doubles as a utility +for doing set operations on hostlists. Example: + + % ./hostlist.py n[1-10] n[5-20] + n[1-20] + % ./hostlist.py --difference n[1-10] n[5-20] + n[1-4] + % ./hostlist.py --expand --intersection n[1-10] n[5-20] + n5 + n6 + n7 + n8 + n9 + n10 + +Install by running + + python setup.py build (as yourself) + python setup.py install (as root) + +or just copy the hostlist.py file to an appropriate place. + +If you have questions, suggestions, bug reports or patches, please send them +to kent@nsc.liu.se. diff --git a/contribs/python/hostlist/hostlist.py b/contribs/python/hostlist/hostlist.py new file mode 100755 index 000000000..9e6a2c594 --- /dev/null +++ b/contribs/python/hostlist/hostlist.py @@ -0,0 +1,426 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Hostlist library and utility +# +# Version 1.0 +# +# Copyright (C) 2008 Kent Engström <kent@nsc.liu.se> and +# Thomas Bellman <bellman@nsc.liu.se>, +# National Supercomputer Centre +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. + +# WARNING: The behaviour in corner cases have not been compared for +# compatibility with pdsh/dshbak/SLURM et al. + +import re +import itertools + +# Exception used for error reporting to the caller +class BadHostlist(Exception): pass + +# Configuration to guard against ridiculously long expanded lists +MAX_SIZE = 100000 + +# Hostlist expansion + +def expand_hostlist(hostlist, allow_duplicates=False, sort=False): + """Expand a Livermore hostlist string to a Python list. + + Exemple: expand_hostlist("n[9-11],d[01-02]") ==> + ['n9', 'n10', 'n11', 'd01', 'd02'] + + Unless allow_duplicates is true, duplicates will be purged + from the results. If sort is true, the output will be sorted. + """ + + results = [] + bracket_level = 0 + part = "" + + for c in hostlist + ",": + if c == "," and bracket_level == 0: + # Comma at top level, split! + if part: results.extend(expand_part(part)) + part = "" + bad_part = False + else: + part += c + + if c == "[": bracket_level += 1 + elif c == "]": bracket_level -= 1 + + if bracket_level > 1: + raise BadHostlist, "nested brackets" + elif bracket_level < 0: + raise BadHostlist, "unbalanced brackets" + + if bracket_level > 0: + raise BadHostlist, "unbalanced brackets" + + if not allow_duplicates: + results = remove_duplicates(results) + if sort: + results = numerically_sorted(results) + return results + +def expand_part(s): + """Expand a part (e.g. "x[1-2]y[1-3][1-3]") (no outer level commas).""" + + # Base case: the empty part expand to the singleton list of "" + if s == "": + return [""] + + # Split into: + # 1) prefix string (may be empty) + # 2) rangelist in brackets (may be missing) + # 3) the rest + + m = re.match(r'([^,\[]*)(\[[^\]]*\])?(.*)', s) + (prefix, rangelist, rest) = m.group(1,2,3) + + # Expand the rest first (here is where we recurse!) + rest_expanded = expand_part(rest) + + # Expand our own part + if not rangelist: + # If there is no rangelist, our own contribution is the prefix only + us_expanded = [prefix] + else: + # Otherwise expand the rangelist (adding the prefix before) + us_expanded = expand_rangelist(prefix, rangelist[1:-1]) + + # Combine our list with the list from the expansion of the rest + # (but guard against too large results first) + if len(us_expanded) * len(rest_expanded) > MAX_SIZE: + raise BadHostlist, "results too large" + + return [us_part + rest_part + for us_part in us_expanded + for rest_part in rest_expanded] + +def expand_rangelist(prefix, rangelist): + """ Expand a rangelist (e.g. "1-10,14"), putting a prefix before.""" + + # Split at commas and expand each range separately + results = [] + for range_ in rangelist.split(","): + results.extend(expand_range(prefix, range_)) + return results + +def expand_range(prefix, range_): + """ Expand a range (e.g. 1-10 or 14), putting a prefix before.""" + + # Check for a single number first + m = re.match(r'^[0-9]+$', range_) + if m: + return ["%s%s" % (prefix, range_)] + + # Otherwise split low-high + m = re.match(r'^([0-9]+)-([0-9]+)$', range_) + if not m: + raise BadHostlist, "bad range" + + (s_low, s_high) = m.group(1,2) + low = int(s_low) + high = int(s_high) + width = len(s_low) + + if high < low: + raise BadHostlist, "start > stop" + elif high - low > MAX_SIZE: + raise BadHostlist, "range too large" + + results = [] + for i in xrange(low, high+1): + results.append("%s%0*d" % (prefix, width, i)) + return results + +def remove_duplicates(l): + """Remove duplicates from a list (but keep the order).""" + seen = set() + results = [] + for e in l: + if e not in seen: + results.append(e) + seen.add(e) + return results + +# Hostlist collection + +def collect_hostlist(hosts, silently_discard_bad = False): + """Collect a hostlist string from a Python list of hosts. + + We start grouping from the rightmost numerical part. + Duplicates are removed. + + A bad hostname raises an exception (unless silently_discard_bad + is true causing the bad hostname to be silently discarded instead). + """ + + # Split hostlist into a list of (host, "") for the iterative part. + # (Also check for bad node names now) + # The idea is to move already collected numerical parts from the + # left side (seen by each loop) to the right side (just copied). + + left_right = [] + for host in hosts: + # We remove leading and trailing whitespace first, and skip empty lines + host = host.strip() + if host == "": continue + + # We cannot accept a host containing any of the three special + # characters in the hostlist syntax (comma and flat brackets) + if re.search(r'[][,]', host): + if silently_discard_bad: + continue + else: + raise BadHostlist, "forbidden character" + + left_right.append((host, "")) + + # Call the iterative function until it says it's done + looping = True + while looping: + left_right, looping = collect_hostlist_1(left_right) + return ",".join([left + right for left, right in left_right]) + +def collect_hostlist_1(left_right): + """Collect a hostlist string from a list of hosts (left+right). + + The input is a list of tuples (left, right). The left part + is analyzed, while the right part is just passed along + (it can contain already collected range expressions). + """ + + # Scan the list of hosts (left+right) and build two things: + # *) a set of all hosts seen (used later) + # *) a list where each host entry is preprocessed for correct sorting + + sortlist = [] + remaining = set() + for left, right in left_right: + host = left + right + remaining.add(host) + + # Match the left part into parts + m = re.match(r'^(.*?)([0-9]+)?([^0-9]*)$', left) + (prefix, num_str, suffix) = m.group(1,2,3) + + # Add the right part unprocessed to the suffix. + # This ensures than an already computed range expression + # in the right part is not analyzed again. + suffix = suffix + right + + if num_str is None: + # A left part with no numeric part at all gets special treatment! + # The regexp matches with the whole string as the suffix, + # with nothing in the prefix or numeric parts. + # We do not want that, so we move it to the prefix and put + # None as a special marker where the suffix should be. + assert prefix == "" + sortlist.append(((host, None), None, None, host)) + else: + # A left part with at least an numeric part + # (we care about the rightmost numeric part) + num_int = int(num_str) + num_width = len(num_str) # This width includes leading zeroes + sortlist.append(((prefix, suffix), num_int, num_width, host)) + + # Sort lexicographically, first on prefix, then on suffix, then on + # num_int (numerically), then... + # This determines the order of the final result. + + sortlist.sort() + + # We are ready to collect the result parts as a list of new (left, + # right) tuples. + + results = [] + needs_another_loop = False + + # Now group entries with the same prefix+suffix combination (the + # key is the first element in the sortlist) to loop over them and + # then to loop over the list of hosts sharing the same + # prefix+suffix combination. + + for ((prefix, suffix), group) in itertools.groupby(sortlist, + key=lambda x:x[0]): + + if suffix is None: + # Special case: a host with no numeric part + results.append(("", prefix)) # Move everything to the right part + remaining.remove(prefix) + else: + # The general case. We prepare to collect a list of + # ranges expressed as (low, high, width) for later + # formatting. + range_list = [] + + for ((prefix2, suffix2), num_int, num_width, host) in group: + if host not in remaining: + # Below, we will loop internally to enumate a whole range + # at a time. We then remove the covered hosts from the set. + # Therefore, skip the host here if it is gone from the set. + continue + assert num_int is not None + + # Scan for a range starting at the current host + low = num_int + while True: + host = "%s%0*d%s" % (prefix, num_width, num_int, suffix) + if host in remaining: + remaining.remove(host) + num_int += 1 + else: + break + high = num_int - 1 + assert high >= low + range_list.append((low, high, num_width)) + + # We have a list of ranges to format. We make sure + # we move our handled numerical part to the right to + # stop it from being processed again. + needs_another_loop = True + if len(range_list) == 1 and range_list[0][0] == range_list[0][1]: + # Special case to make sure that n1 is not shown as n[1] etc + results.append((prefix, + "%0*d%s" % + (range_list[0][2], range_list[0][0], suffix))) + else: + # General case where high > low + results.append((prefix, "[" + \ + ",".join([format_range(l, h, w) + for l, h, w in range_list]) + \ + "]" + suffix)) + + # At this point, the set of remaining hosts should be empty and we + # are ready to return the result, together with the flag that says + # if we need to loop again (we do if we have added something to a + # left part). + assert not remaining + return results, needs_another_loop + +def format_range(low, high, width): + """Format a range from low to high inclusively, with a certain width.""" + + if low == high: + return "%0*d" % (width, low) + else: + return "%0*d-%0*d" % (width, low, width, high) + +# Sort a list of hosts numerically + +def numerically_sorted(l): + """Sort a list of hosts numerically. + + E.g. sorted order should be n1, n2, n10; not n1, n10, n2. + """ + + return sorted(l, key=numeric_sort_key) + +def int_if_possible(x): + try: + return int(x) + except: + return x + +def numeric_sort_key(x): + return [int_if_possible(n) for n in re.findall("([0-9]+|[^0-9]+)", x)] + + +# +# The library stuff ends here. Now lets do something useful +# when called from the command line too :-) +# + +if __name__ == '__main__': + import optparse + import sys + import operator + import os + + def func_union(args): + return reduce(operator.or_, args) + + def func_intersection(args): + return reduce(operator.and_, args) + + def func_difference(args): + return reduce(operator.sub, args) + + def func_xor(args): + return reduce(operator.xor, args) + + op = optparse.OptionParser(usage="usage: %prog [options] {hostlist arguments}") + op.add_option("-u", "--union", + action="store_const", dest="func", const=func_union, + default=func_union, + help="compute the union of the hostlist arguments (default)") + op.add_option("-i", "--intersection", + action="store_const", dest="func", const=func_intersection, + help="compute the intersection of the hostlist arguments") + op.add_option("-d", "--difference", + action="store_const", dest="func", const=func_difference, + help="compute the difference between the first hostlist argument and the rest") + op.add_option("-x", "--symmetric-difference", + action="store_const", dest="func", const=func_xor, + help="compute the symmetric difference between the first hostlist argument and the rest") + op.add_option("-w", "--expand", + action="store_true", + help="output the results as an expanded list") + op.add_option("-c", "--collapse", + action="store_false", dest="expand", + help="output the results as a hostlist expression (default)") + op.add_option("-n", "--count", + action="store_true", + help="output the number of hosts instead of a hostlist") + (opts, args) = op.parse_args() + + func = opts.func + + func_args = [] + + try: + for a in args: + if a == "-": + for a in sys.stdin.read().split(): + func_args.append(set(expand_hostlist(a))) + else: + func_args.append(set(expand_hostlist(a))) + except BadHostlist, e: + sys.stderr.write("Bad hostlist ``%s'' encountered: %s\n" + % ((a,) + e.args)) + sys.exit(os.EX_DATAERR) + + if not func_args: + op.print_help() + sys.exit(os.EX_USAGE) + + res = func(func_args) + + if opts.count: + print len(res) + elif opts.expand: + for host in numerically_sorted(res): + print host + else: + try: + print collect_hostlist(res) + except BadHostlist, e: + sys.stderr.write("Bad hostname encountered: %s\n" % e.args) + sys.exit(os.EX_DATAERR) diff --git a/contribs/python/hostlist/setup.py b/contribs/python/hostlist/setup.py new file mode 100644 index 000000000..87acae2f0 --- /dev/null +++ b/contribs/python/hostlist/setup.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- + +from distutils.core import setup + +setup(name = "python-hostlist", + version = "1.0", # Change comment in hostlist.py too! + description = "Python module for hostlist handling", + long_description = "The hostlist.py module knows how to expand and collect LLNL hostlist expressions.", + author = "Kent Engström", + author_email = "kent@nsc.liu.se", + url = "http://www.nsc.liu.se/~kent/python-hostlist/", + license = "GPL2+", + py_modules = ["hostlist"], + ) diff --git a/contribs/python/hostlist/test/Makefile.am b/contribs/python/hostlist/test/Makefile.am new file mode 100644 index 000000000..c795271fb --- /dev/null +++ b/contribs/python/hostlist/test/Makefile.am @@ -0,0 +1,2 @@ +EXTRA_DIST = \ + test_hostlist.py diff --git a/contribs/python/hostlist/test/Makefile.in b/contribs/python/hostlist/test/Makefile.in new file mode 100644 index 000000000..118f5d3a1 --- /dev/null +++ b/contribs/python/hostlist/test/Makefile.in @@ -0,0 +1,407 @@ +# Makefile.in generated by automake 1.10.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +subdir = contribs/python/hostlist/test +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ + $(top_srcdir)/auxdir/slurm.m4 \ + $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ + $(top_srcdir)/auxdir/x_ac_affinity.m4 \ + $(top_srcdir)/auxdir/x_ac_aix.m4 \ + $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ + $(top_srcdir)/auxdir/x_ac_cflags.m4 \ + $(top_srcdir)/auxdir/x_ac_databases.m4 \ + $(top_srcdir)/auxdir/x_ac_debug.m4 \ + $(top_srcdir)/auxdir/x_ac_elan.m4 \ + $(top_srcdir)/auxdir/x_ac_federation.m4 \ + $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ + $(top_srcdir)/auxdir/x_ac_gtk.m4 \ + $(top_srcdir)/auxdir/x_ac_munge.m4 \ + $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ + $(top_srcdir)/auxdir/x_ac_pam.m4 \ + $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ + $(top_srcdir)/auxdir/x_ac_readline.m4 \ + $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ + $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ + $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ + $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ + $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h +CONFIG_CLEAN_FILES = +SOURCES = +DIST_SOURCES = +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTHD_CFLAGS = @AUTHD_CFLAGS@ +AUTHD_LIBS = @AUTHD_LIBS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BG_INCLUDES = @BG_INCLUDES@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CMD_LDFLAGS = @CMD_LDFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DSYMUTIL = @DSYMUTIL@ +ECHO = @ECHO@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +ELAN_LIBS = @ELAN_LIBS@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FEDERATION_LDFLAGS = @FEDERATION_LDFLAGS@ +FFLAGS = @FFLAGS@ +GREP = @GREP@ +GTK2_CFLAGS = @GTK2_CFLAGS@ +GTK2_LIBS = @GTK2_LIBS@ +HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ +HAVEPGCONFIG = @HAVEPGCONFIG@ +HAVEPKGCONFIG = @HAVEPKGCONFIG@ +HAVE_AIX = @HAVE_AIX@ +HAVE_ELAN = @HAVE_ELAN@ +HAVE_FEDERATION = @HAVE_FEDERATION@ +HAVE_OPENSSL = @HAVE_OPENSSL@ +HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIB_LDFLAGS = @LIB_LDFLAGS@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ +MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ +MUNGE_LIBS = @MUNGE_LIBS@ +MYSQL_CFLAGS = @MYSQL_CFLAGS@ +MYSQL_LIBS = @MYSQL_LIBS@ +NCURSES = @NCURSES@ +NMEDIT = @NMEDIT@ +NUMA_LIBS = @NUMA_LIBS@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PAM_LIBS = @PAM_LIBS@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PGSQL_CFLAGS = @PGSQL_CFLAGS@ +PGSQL_LIBS = @PGSQL_LIBS@ +PLPA_LIBS = @PLPA_LIBS@ +PROCTRACKDIR = @PROCTRACKDIR@ +PROJECT = @PROJECT@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +READLINE_LIBS = @READLINE_LIBS@ +RELEASE = @RELEASE@ +SED = @SED@ +SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ +SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLURMCTLD_PORT = @SLURMCTLD_PORT@ +SLURMDBD_PORT = @SLURMDBD_PORT@ +SLURMD_PORT = @SLURMD_PORT@ +SLURM_API_AGE = @SLURM_API_AGE@ +SLURM_API_CURRENT = @SLURM_API_CURRENT@ +SLURM_API_MAJOR = @SLURM_API_MAJOR@ +SLURM_API_REVISION = @SLURM_API_REVISION@ +SLURM_API_VERSION = @SLURM_API_VERSION@ +SLURM_MAJOR = @SLURM_MAJOR@ +SLURM_MICRO = @SLURM_MICRO@ +SLURM_MINOR = @SLURM_MINOR@ +SLURM_VERSION = @SLURM_VERSION@ +SO_LDFLAGS = @SO_LDFLAGS@ +SSL_CPPFLAGS = @SSL_CPPFLAGS@ +SSL_LDFLAGS = @SSL_LDFLAGS@ +SSL_LIBS = @SSL_LIBS@ +STRIP = @STRIP@ +UTIL_LIBS = @UTIL_LIBS@ +VERSION = @VERSION@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_F77 = @ac_ct_F77@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +EXTRA_DIST = \ + test_hostlist.py + +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu contribs/python/hostlist/test/Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu contribs/python/hostlist/test/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +tags: TAGS +TAGS: + +ctags: CTAGS +CTAGS: + + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-exec-am: + +install-html: install-html-am + +install-info: install-info-am + +install-man: + +install-pdf: install-pdf-am + +install-ps: install-ps-am + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + distclean distclean-generic distclean-libtool distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am uninstall uninstall-am + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/contribs/python/hostlist/test/test_hostlist.py b/contribs/python/hostlist/test/test_hostlist.py new file mode 100644 index 000000000..d539742df --- /dev/null +++ b/contribs/python/hostlist/test/test_hostlist.py @@ -0,0 +1,123 @@ +from hostlist import expand_hostlist, collect_hostlist, BadHostlist +import unittest + +class TestExpand1(unittest.TestCase): + + def expand_eq(self, hostlist, expanded_list): + self.assertEqual(expand_hostlist(hostlist), expanded_list) + + def expand_sort_eq(self, hostlist, expanded_list): + self.assertEqual(expand_hostlist(hostlist, sort=True), expanded_list) + + def expand_length(self, hostlist, expanded_length): + self.assertEqual(len(expand_hostlist(hostlist)), expanded_length) + + def expand_bad(self, hostlist): + self.assertRaises(BadHostlist, expand_hostlist, hostlist) + + def test_expand(self): + self.expand_eq("n[9-11]", ["n9", "n10", "n11"]) + self.expand_sort_eq("n[9-11]", ["n9", "n10", "n11"]) + self.expand_eq("n[09-11]", ["n09", "n10", "n11"]) + self.expand_eq("n[009-11]", ["n009", "n010", "n011"]) + self.expand_sort_eq("n[009-11]", ["n009", "n010", "n011"]) + self.expand_eq("n[009-011]", ["n009", "n010", "n011"]) + + self.expand_eq("n[17-17]", ["n17"]) + self.expand_eq("n1,n3", ["n1", "n3"]) + self.expand_sort_eq("n1,n3", ["n1", "n3"]) + self.expand_eq("n3,n1", ["n3", "n1"]) + self.expand_sort_eq("n3,n1", ["n1", "n3"]) + self.expand_eq("n1,n3,n1", ["n1", "n3"]) + self.expand_sort_eq("n1,n3,n1", ["n1", "n3"]) + self.expand_eq("n3,n1,n3", ["n3", "n1"]) + self.expand_sort_eq("n3,n1,n3", ["n1", "n3"]) + + self.expand_eq("n[1],n3", ["n1", "n3"]) + self.expand_eq("n[1,3]", ["n1", "n3"]) + self.expand_eq("n[3,1]", ["n3", "n1"]) + self.expand_sort_eq("n[3,1]", ["n1", "n3"]) + self.expand_eq("n[1,3,1]", ["n1", "n3"]) + + self.expand_eq("n1,n2,n[9-11],n3", ["n1", "n2", "n9", "n10", "n11", "n3"]) + + self.expand_eq("n[1-3]m[4-6]", ["n1m4", "n1m5", "n1m6", + "n2m4", "n2m5", "n2m6", + "n3m4", "n3m5", "n3m6"]) + self.expand_eq("n[1-2][4-5]m", ["n14m", "n15m", + "n24m", "n25m"]) + self.expand_eq("[1-2][4-5]", ["14", "15", + "24", "25"]) + + self.expand_length("n[1-100]m[1-100]", 100*100) + self.expand_length("[1-10][1-10][1-10]", 10*10*10) + + self.expand_eq("n[1-5,3-8]", ["n1", "n2", "n3", "n4", "n5", "n6", "n7", "n8"]) + self.expand_eq("n[3-8,1-5]", ["n3", "n4", "n5", "n6", "n7", "n8", "n1", "n2"]) + self.expand_sort_eq("n[3-8,1-5]", ["n1", "n2", "n3", "n4", "n5", "n6", "n7", "n8"]) + + self.expand_eq("", []) + + self.expand_bad("n[]") + self.expand_bad("n[-]") + self.expand_bad("n[1-]") + self.expand_bad("n[-1]") + self.expand_bad("n[1,]") + self.expand_bad("n[,1]") + self.expand_bad("n[1-3,]") + self.expand_bad("n[,1-3]") + self.expand_bad("n[3-1]") + self.expand_bad("n[") + self.expand_bad("n]") + self.expand_bad("n[[]]") + self.expand_bad("n[1,[]]") + self.expand_bad("n[x]") + self.expand_bad("n[1-10x]") + + self.expand_bad("n[1-1000000]") + self.expand_bad("n[1-1000][1-1000]") + + def collect_eq(self, hostlist, expanded_list): + # Note the order of the arguments! This makes it easier to + # copy tests between the expand and collect parts! + self.assertEqual(hostlist, collect_hostlist(expanded_list)) + + def test_collect(self): + self.collect_eq("n[9-11]", ["n9", "n10", "n11"]) + self.collect_eq("n[09-11]", ["n09", "n10", "n11"]) + self.collect_eq("n[009-011]", ["n009", "n010", "n011"]) + + self.collect_eq("n[1-3,9-11]", ["n1", "n2", "n9", "n10", "n11", "n3"]) + + self.collect_eq("m1,n[9-11],p[7-8]", ["n9", "n10", "p7", "m1", "n11", "p8"]) + + self.collect_eq("x[1-2]y[4-5]", ["x1y4", "x1y5", + "x2y4", "x2y5"]) + self.collect_eq("[1-2]y[4-5]z", ["1y4z", "1y5z", + "2y4z", "2y5z"]) + + self.collect_eq("x1y[4-5],x2y4", ["x1y4", "x1y5", "x2y4"]) + self.collect_eq("x1y5,x2y[4-5]", ["x1y5", "x2y4", "x2y5"]) + self.collect_eq("x1y5,x2y4", ["x1y5", "x2y4"]) + + self.collect_eq("", [""]) + + self.collect_eq("n[9,09]", ["n09","n9"]) + self.collect_eq("n[9,09]", ["n9","n09"]) + self.collect_eq("n[9-10]", ["n9","n10"]) + self.collect_eq("n[09-10]", ["n09","n10"]) + self.collect_eq("n[009,10]", ["n009","n10"]) + + self.collect_eq("x", ["x"]) + self.collect_eq("x", ["x", "x"]) + self.collect_eq("x,y", ["x", "y", "x"]) + + self.collect_eq("n1", ["n1"]) + self.collect_eq("n1", ["n1", "n1"]) + self.collect_eq("n[1-2]", ["n1", "n2", "n1"]) + + self.collect_eq("x,y[10-12],z", ["z","y10","y12", "x", "y11"]) + + +if __name__ == '__main__': + unittest.main() diff --git a/doc/html/accounting.shtml b/doc/html/accounting.shtml index 657db1605..5764cb0da 100644 --- a/doc/html/accounting.shtml +++ b/doc/html/accounting.shtml @@ -128,6 +128,18 @@ The pathname of local domain socket will be needed in the SLURM and SlurmDBD configuration files (slurm.conf and slurmdbd.conf respectively, more details are provided below).</p> +Whether you use any authentication module or not you will need to have +a way for the SlurmDBD to get uid's for users and/or admin. If using +Munge it is ideal for your users to have the same id on all your +clusters. If this is the case you should have a combination of every clusters +/etc/passwd file on the database server to allow the DBD to resolve +names for authentication. If using Munge and a users name is not in +the passwd file the action will fail. If not using Munge, you should +add anyone you want to be an administrator or operator to the passwd file. +If they plan on running sacctmgr or any of the accounting tools they +should have the same uid, or they will not authentic correctly. An +LDAP server could also server as a way to gather this information. + <h2>Slurm JobComp Configuration</h2> <p>Presently job completion is not supported with the SlurmDBD, but can be @@ -336,11 +348,17 @@ Server version: 5.0.51a-3ubuntu5.1 (Ubuntu) Type 'help;' or '\h' for help. Type '\c' to clear the buffer. -mysql> grant all on slurm_acct_db.* TO 'da'@'localhost'; +mysql> grant all on slurm_acct_db.* TO 'slurm'@'localhost'; +Query OK, 0 rows affected (0.00 sec) + +or with a password... + +mysql> grant all on slurm_acct_db.* TO 'slurm'@'localhost' + -> identified by 'some_pass' with grant option; Query OK, 0 rows affected (0.00 sec) </pre> -<p>This will grant user 'da' access to do what he needs to do on the +<p>This will grant user 'slurm' access to do what he needs to do on the local host. This should be done before the SlurmDBD will work properly.</p> diff --git a/doc/html/configurator.html.in b/doc/html/configurator.html.in index a10f6f157..574d8a276 100644 --- a/doc/html/configurator.html.in +++ b/doc/html/configurator.html.in @@ -446,10 +446,10 @@ not recommended production use<br> <P> Library used for job step cryptographic signature generation.<BR> Select one value for <B>CryptoType</B>:<BR> -<input type="radio" name="crypto_type" value="munge"><B>Munge</B>: LLNL's +<input type="radio" name="crypto_type" value="munge" checked><B>Munge</B>: LLNL's <A href="http://home.gna.org/munge/">Munge</A> (has Gnu Public License)<BR> -<input type="radio" name="crypto_type" value="openssl" checked> <B>OpenSSL</B>: -<A href="http://www.openssl.org/">OpenSSL</A> (has better performance for signature generation) +<input type="radio" name="crypto_type" value="openssl"> <B>OpenSSL</B>: +<A href="http://www.openssl.org/">OpenSSL</A> <P> Define the location of public and private keys used by SLURM's cryptographic signature generation plugin (CryptoType). diff --git a/doc/html/download.shtml b/doc/html/download.shtml index 5b78cbbab..0b5b03bd1 100644 --- a/doc/html/download.shtml +++ b/doc/html/download.shtml @@ -16,12 +16,13 @@ SLURM has also been packaged for Pluggable Authentication Module (PAM) for restricting access to compute nodes where SLURM performs resource management. Access to the node is restricted to user root and users who have been allocated resources on that node. <br> -pam_slurm is available for download from <br> -<a href="https://sourceforge.net/projects/slurm/"> -https://sourceforge.net/projects/slurm/</a><br> or use the -<a href="http://www.debian.org/">Debian</a> package -named <i>libpam-slurm</i>.<br> -The latest stable release is version 1.4.</p> +pam_slurm is available for download from +<a href="https://sourceforge.net/projects/slurm/"> +https://sourceforge.net/projects/slurm/</a><br> +SLURM's PAM module has also been packaged for +<a href="http://packages.debian.org/src:libpam-slurm">Debian</a> and +<a href="http://packages.ubuntu.com/src:libpam-slurm">Ubuntu</a> +(both named <i>libpam-slurm</i>).</p> </ul> <h1>Related Software</h1> @@ -30,18 +31,20 @@ The latest stable release is version 1.4.</p> <li>Authentication plugins identifies the user originating a message.</li> <ul> <li><b>Munge</b><br> -In order to compile the "auth/munge" authentication plugin for SLURM, you will need -to build and install Munge, available from -<a href="http://home.gna.org/munge/">http://home.gna.org/munge/</a>. +In order to compile the "auth/munge" authentication plugin for SLURM, +you will need to build and install Munge, available from +<a href="http://home.gna.org/munge/">http://home.gna.org/munge/</a> or +<a href="http://packages.debian.org/src:munge">Debian</a> or +<a href="http://packages.ubuntu.com/src:munge">Ubuntu</a>. </li> </ul> <li>Databases can be used to store accounting information. See our <a href="accounting.html">Accounting</a> web page for more information.</li> <ul> -<li><a href="http://www.clusterresources.com/pages/products/gold-allocation-manager.php">Gold</a></li> -<li><a href="http://www.mysql.com/">MySQL</a></li> +<li><a href="http://www.mysql.com/">MySQL</a> (recommended)</li> <li><a href="http://www.postgresql.org/">PostgreSQL</a></li> +<li><a href="http://www.clusterresources.com/pages/products/gold-allocation-manager.php">Gold</a></li> </ul> <li>Digital signatures (Cypto plugin) are used to insure message are not altered.</li> @@ -51,9 +54,9 @@ OpenSSL is recommended for generation of digital signatures. Download it from <a href="http://www.openssl.org/">http://www.openssl.org/</a>.</li> <li><b>Munge</b><br> Munge can be used at an alternative to OpenSSL. -Munge is available under the Gnu General Public License, but is slower than OpenSSL -for the generation of digital signatures. Munge is available from -<a href="http://home.gna.org/munge/">http://home.gna.org/munge/</a>.</li> +Munge is available under the Gnu General Public License, but is slower than +OpenSSL for the generation of digital signatures. See Munge download +information above.</li> </ul> <li>Interconnect plugins (Switch plugin)</li> @@ -101,6 +104,6 @@ Portable Linux Processor Affinity (PLPA)</a></li> </ul> -<p style="text-align:center;">Last modified 27 June 2008</p> +<p style="text-align:center;">Last modified 25 July 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/faq.shtml b/doc/html/faq.shtml index 0f6d6baac..9061ba998 100644 --- a/doc/html/faq.shtml +++ b/doc/html/faq.shtml @@ -23,8 +23,10 @@ name for a batch job?</a></li> allocated to a SLURM job?</a></li> <li><a href="#terminal">Can tasks be launched with a remote terminal?</a></li> <li><a href="#force">What does "srun: Force Terminated job" indicate?</a></li> -<li><a href="#early_exit">What does this mean: "srun: First task exited 30s ago" -followed by "srun Job Failed"?</a></li> +<li><a href="#early_exit">What does this mean: "srun: First task exited +30s ago" followed by "srun Job Failed"?</a></li> +<li><a href="#memlock">Why is my MPI job failing due to the locked memory +(memlock) limit being too low?</a></li> </ol> <h2>For Administrators</h2> <ol> @@ -82,6 +84,7 @@ to log job step information at the appropriate level?</li> SLURM RPM?</li> <li><a href="#slurmdbd">Why should I use the slurmdbd instead of the regular database plugins?</li> +<li><a href="#debug">How can I build SLURM with debugging symbols?</li> </ol> <h2>For Users</h2> @@ -484,9 +487,32 @@ not normally productive. This behavior can be changed using srun's period or disable the timeout altogether. See srun's man page for details. +<p><a name="memlock"><b>18. Why is my MPI job failing due to the +locked memory (memlock) limit being too low?</b></a><br> +By default, SLURM propagates all of your resource limits at the +time of job submission to the spawned tasks. +This can be disabled by specifically excluding the propagation of +specific limits in the <i>slurm.conf</i> file. For example +<i>PropagateResourceLimitsExcept=MEMLOCK</i> might be used to +prevent the propagation of a user's locked memory limit from a +login node to a dedicated node used for his parallel job. +If the user's resource limit is not propagated, the limit in +effect for the <i>slurmd</i> daemon will be used for the spawned job. +A simple way to control this is to insure that user <i>root</i> has a +sufficiently large resource limit and insuring that <i>slurmd</i> takes +full advantage of this limit. For example, you can set user's root's +locked memory limit limit to be unlimited on the compute nodes (see +<i>"man limits.conf"</i>) and insuring that <i>slurmd</i> takes +full advantage of this limit (e.g. by adding something like +<i>"ulimit -l unlimited"</i> to the <i>/etc/init.d/slurm</i> +script used to initiate <i>slurmd</i>). +Related information about <a href="#pam">PAM</a> is also available. + <p class="footer"><a href="#top">top</a></p> + <h2>For Administrators</h2> + <p><a name="suspend"><b>1. How is job suspend/resume useful?</b></a><br> Job suspend/resume is most useful to get particularly large jobs initiated in a timely fashion with minimal overhead. Say you want to get a full-system @@ -952,8 +978,14 @@ slurmdbd. slurmdbd you can also query any cluster using the slurmdbd from any other cluster's nodes. +<p><a name="debug"><b>29. How can I build SLURM with debugging symbols?</b></br> +Set your CFLAGS environment variable before building. +You want the "-g" option to produce debugging information and +"-O0" to set the optimization level to zero (off). For example:<br> +CFLAGS="-g -O0" ./configure ... + <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 1 July 2008</p> +<p style="text-align:center;">Last modified 8 August 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/team.shtml b/doc/html/team.shtml index 79ff195ad..f82230ee6 100644 --- a/doc/html/team.shtml +++ b/doc/html/team.shtml @@ -5,7 +5,8 @@ <a href="https://www.llnl.gov/">Lawrence Livermore National Laboratory</a> (LLNL), <a href="http://www.hp.com/">HP</a>, <a href="http://www.bull.com/">Bull</a>, -<a href="http://www.lnxi.com/">Linux NetworX</a>. +<a href="http://www.lnxi.com/">Linux NetworX</a>, +and a host of others. <p>The current SLURM development staff includes: </p> <ul> @@ -28,6 +29,7 @@ <li>Chuck Clouston (Bull)</li> <li>Chris Dunlap (LLNL)</li> <li>Joey Ekstrom (LLNL/Bringham Young University)</li> +<li>Kent Engström (National Supercomputer Centre, Sweden)</li> <li>Jim Garlick (LLNL)</li> <li>Didier Gazen (Laboratoire d'Aerologie, France)</li> <li>Mark Grondona (LLNL)</li> @@ -62,6 +64,6 @@ Networking, Italy)</li> <li>Anne-Marie Wunderlin (Bull)</li> </ul> -<p style="text-align:center;">Last modified 20 June 2008</p> +<p style="text-align:center;">Last modified 28 July 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/testimonials.shtml b/doc/html/testimonials.shtml index 39963ed98..0ee1daef5 100644 --- a/doc/html/testimonials.shtml +++ b/doc/html/testimonials.shtml @@ -28,6 +28,16 @@ Jeffrey M. Squyres, Pervasive Technology Labs at Indiana University </i> <HR SIZE=4> +<i> +"Thank you for SLURM! It is one of the nicest pieces of free software +for managing HPC clusters we have come across in a long time. +Both of our Blue Genes are running SLURM and it works fantastically +well."<br><br> +Adam Todorski, Computational Center for Nanotechnology Inovations, +Rensselaer Polytechnic Institute +</i> +<HR SIZE=4> + <i> We adopted SLURM as our resource manager over two years ago when it was at the 0.3.x release level. Since then it has become an integral and important @@ -102,6 +112,6 @@ Bill Celmaster, XC Program Manager, Hewlett-Packard Company </i> <HR SIZE=4> -<p style="text-align:center;">Last modified 17 January 2007</p> +<p style="text-align:center;">Last modified 289 July 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/man/man1/sacctmgr.1 b/doc/man/man1/sacctmgr.1 index 01dad39b3..e57ba52c1 100644 --- a/doc/man/man1/sacctmgr.1 +++ b/doc/man/man1/sacctmgr.1 @@ -25,21 +25,11 @@ add, delete, modify, and/or list \fIassociation\fR records then commit the changes and exit. .SH "OPTIONS" -.TP -\fB\-a\fR, \fB\-\-all\fR -Display information about all entities including hidden or deleted ones. -This is equivalent to the \fBall\fR command. - .TP \fB\-h\fR, \fB\-\-help\fR Print a help message describing the usage of \fBssacctmgr\fR. This is equivalent to the \fBhelp\fR command. -.TP -\fB\-\-hide\fR -Do not display information about hidden or deleted entities. -This is equivalent to the \fBhide\fR command. - .TP \fB\-\-immediate\fR commit changes immediately. @@ -55,13 +45,22 @@ This is equivalent to the \fBoneliner\fR command. .TP \fB\-p\fR, \fB\-\-parsable\fR -Output will be '|' delimited. +Output will be '|' delimited with a '|' at the end. + +.TP +\fB\-P\fR, \fB\-\-parsable2\fR +Output will be '|' delimited without a '|' at the end. .TP \fB\-q\fR, \fB\-\-quiet\fR Print no messages other than error messages. This is equivalent to the \fBquiet\fR command. +.TP +\fB\-r\fR, \fB\-\-readonly\fR +Makes it so the running sacctmgr can not modify accounting information. +This is equivalent to the \fBreadonly\fR command. + .TP \fB\-s\fR, \fB\-\-associations\fR Use with show or list to display associations with the entity. @@ -79,10 +78,6 @@ This is equivalent to the \fBversion\fR command. .SH "COMMANDS" -.TP -\fBall\fR -Display information about all entities including hidden or deleted ones. - .TP \fBadd\fR <\fIENTITY\fR> <\fISPECS\fR> Add an entity. @@ -110,10 +105,6 @@ Identical to the \fBquit\fR command. \fBhelp\fP Display a description of sacctmgr options and commands. -.TP -\fBhide\fP -Do not display information about hidden or deleted entities. - .TP \fBlist\fR <\fIENTITY\fR> [with <\fISPECS\fR>] Display information about the specified entities. @@ -172,7 +163,7 @@ The hierarchy may have an arbitrary depth. .TP \fIassociation\fP The entity used to group information consisting of four parameters: -\fIaccount\fR, \fIcluster\fR, \fIpartition\fR, and \fIuser\fR. +\fIaccount\fR, \fIcluster\fR, \fIpartition (optional)\fR, and \fIuser\fR. .TP \fIcluster\fP diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index 810dba3c9..b0d91c25d 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -219,10 +219,13 @@ local host). It can be useful to diagnose problems. By default, all elements of the entity type specified are printed. .TP -\fBshutdown\fP -Instruct all Slurm daemons to save current state and terminate. -The Slurm controller (slurmctld) forwards the request all other daemons -(slurmd daemon on each compute node). +\fBshutdown\fP \fIOPTION\fP +Instruct Slurm daemons to save current state and terminate. +By default, the Slurm controller (slurmctld) forwards the request all +other daemons (slurmd daemon on each compute node). +An \fIOPTION\fP of \fIslurmctld\fP or \fIcontroller\fP results in +only the slurmctld daemon being shutdown and the slurmd daemons +remaining active. .TP \fBsuspend\fP \fIjob_id\fP diff --git a/doc/man/man1/sreport.1 b/doc/man/man1/sreport.1 index ac0d0b7d5..e07f8c0cf 100644 --- a/doc/man/man1/sreport.1 +++ b/doc/man/man1/sreport.1 @@ -33,7 +33,11 @@ This is equivalent to the \fBquiet\fR command. .TP \fB\-p\fR, \fB\-\-parsable\fR -Make output '|' delimited. +Output will be '|' delimited with a '|' at the end. + +.TP +\fB\-P\fR, \fB\-\-parsable2\fR +Output will be '|' delimited without a '|' at the end. .TP \fB\-v\fR, \fB\-\-verbose\fR diff --git a/doc/man/man3/slurm_get_errno.3 b/doc/man/man3/slurm_get_errno.3 index d80bf817c..51b93cd69 100644 --- a/doc/man/man3/slurm_get_errno.3 +++ b/doc/man/man3/slurm_get_errno.3 @@ -42,7 +42,7 @@ A pointer to a string used as a message header for printing along with an error .br #include <slurm/slurm.h> .br -#include <slurm/slur_errno.h> +#include <slurm/slurm_errno.h> .LP int main (int argc, char *argv[]) .br diff --git a/doc/man/man3/slurm_reconfigure.3 b/doc/man/man3/slurm_reconfigure.3 index ee6b45e26..60c06fd62 100644 --- a/doc/man/man3/slurm_reconfigure.3 +++ b/doc/man/man3/slurm_reconfigure.3 @@ -18,7 +18,7 @@ int \fBslurm_reconfigure\fR ( ); .LP int \fBslurm_shutdown\fR ( .br - uint16_t \fIcore\fP + uint16_t \fIoptions\fP .br ); .LP @@ -48,8 +48,12 @@ int \fBslurm_update_partition\fR ( .SH "ARGUMENTS" .LP .TP -\fIcore\fP -Generate a core file if value is non\-zero. +\fIoptions\fP +0: all slurm daemons are shutdown +.br +1: slurmctld generates a core file +.br +2: only the slurmctld is shutdown (no core file) .TP \fIdelete_part_msg_ptr\fP Specifies the pointer to a partition delete request specification. See slurm.h for diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 04f5b0b9a..41806f1b7 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -201,11 +201,7 @@ job step credentials. The slurmctld daemon must be restarted for a change in \fBCryptoType\fR to take effect. Acceptable values at present include "crypto/munge" and "crypto/openssl". -OpenSSL offers the best performance and is available with an -Apache style open source license. -Munge is a little slower, but is available under the Gnu General Public -License (GPL). -The default value is "crypto/openssl". +The default value is "crypto/munge". .TP \fBDefMemPerCPU\fR @@ -815,10 +811,6 @@ The default value is "sched/builtin". Backfill scheduling will initiate lower\-priority jobs if doing so does not delay the expected initiation time of any higher priority job. -Note that this backfill scheduler implementation is relatively -simple. It does not support partitions configured to to share -resources (run multiple jobs on the same nodes) or support -jobs requesting specific nodes. When initially setting the value to "sched/wiki", any pending jobs must have their priority set to zero (held). When changing the value from "sched/wiki", all pending jobs @@ -984,13 +976,15 @@ different shared memory region and lose track of any running jobs. \fBSlurmdTimeout\fR The interval, in seconds, that the SLURM controller waits for \fBslurmd\fR to respond before configuring that node's state to DOWN. -The default value is 300 seconds. A value of zero indicates the node will not be tested by \fBslurmctld\fR to confirm the state of \fBslurmd\fR, the node will not be automatically set to a DOWN state indicating a non\-responsive \fBslurmd\fR, and some other tool will take responsibility for monitoring the state of each compute node and its \fBslurmd\fR daemon. -The value may not exceed 65533. +SLURM's hiearchical communication mechanism is used to ping the \fBslurmd\fR +daemons in order to minimize system noise and overhead. +The default value is 300 seconds. +The value may not exceed 65533 seconds. .TP \fBSrunEpilog\fR diff --git a/slurm.spec b/slurm.spec index 69ffeefa3..79c3e3a08 100644 --- a/slurm.spec +++ b/slurm.spec @@ -1,4 +1,4 @@ -# $Id: slurm.spec 14616 2008-07-23 22:28:22Z jette $ +# $Id: slurm.spec 14836 2008-08-21 15:58:32Z jette $ # # Note that this package is not relocatable @@ -62,7 +62,7 @@ %slurm_with_opt aix %endif -# Build with sgijob, and mysql plugins on CHAOS systems +# Build with sgijob plugin and mysql (for slurmdbdb) on CHAOS systems %if %{?chaos}0 %slurm_with_opt mysql %slurm_with_opt sgijob @@ -71,14 +71,14 @@ %endif Name: slurm -Version: 1.3.6 -Release: 1 +Version: 1.3.7 +Release: 1%{?dist} Summary: Simple Linux Utility for Resource Management License: GPL Group: System Environment/Base -Source: slurm-1.3.6.tar.bz2 +Source: slurm-1.3.7.tar.bz2 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release} URL: https://computing.llnl.gov/linux/slurm/ @@ -87,12 +87,19 @@ Requires: slurm-plugins %ifos linux BuildRequires: python %endif + +%if %{?chaos}0 +BuildRequires: ncurses-devel +%endif + %if %{slurm_with pam} BuildRequires: pam-devel %endif + %if %{slurm_with readline} BuildRequires: readline-devel %endif + %if %{slurm_with openssl} BuildRequires: openssl-devel >= 0.9.6 openssl >= 0.9.6 %endif @@ -249,7 +256,7 @@ SLURM process tracking plugin for SGI job containers. ############################################################################# %prep -%setup -n slurm-1.3.6 +%setup -n slurm-1.3.7 %build %configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \ diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index c48aa5d11..f9819f4e8 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -705,6 +705,8 @@ typedef struct task_ext_msg { uint32_t num_tasks; uint32_t *task_id_list; uint32_t return_code; + uint32_t job_id; + uint32_t step_id; } task_exit_msg_t; typedef struct srun_ping_msg { @@ -1902,10 +1904,12 @@ extern int slurm_reconfigure PARAMS(( void )); * slurm_shutdown - issue RPC to have Slurm controller (slurmctld) * cease operations, both the primary and backup controller * are shutdown. - * IN core - controller generates a core file if set + * IN options - 0: all slurm daemons are shutdown + * 1: slurmctld generates a core file + * 2: only the slurmctld is shutdown (no core file) * RET 0 or a slurm error code */ -extern int slurm_shutdown PARAMS(( uint16_t core )); +extern int slurm_shutdown PARAMS(( uint16_t options )); /* * slurm_set_debug_level - issue RPC to set slurm controller debug level diff --git a/src/api/job_info.c b/src/api/job_info.c index f44ed7db9..c94e446ed 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -124,8 +124,7 @@ extern char * slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) { int i, j; - char time_str[32], select_buf[122]; - struct group *group_info = NULL; + char time_str[32], select_buf[122], *group_name, *user_name; char tmp1[128], tmp2[128], *tmp3_ptr; char tmp_line[512]; char *ionodes = NULL; @@ -142,19 +141,16 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) #endif /****** Line 1 ******/ + user_name = uid_to_string((uid_t) job_ptr->user_id); snprintf(tmp_line, sizeof(tmp_line), "JobId=%u UserId=%s(%u) ", - job_ptr->job_id, - uid_to_string((uid_t) job_ptr->user_id), job_ptr->user_id); + job_ptr->job_id, user_name, job_ptr->user_id); + xfree(user_name); out = xstrdup(tmp_line); - group_info = getgrgid((gid_t) job_ptr->group_id ); - if ( group_info && group_info->gr_name[ 0 ] ) { - snprintf(tmp_line, sizeof(tmp_line), "GroupId=%s(%u)", - group_info->gr_name, job_ptr->group_id ); - } else { - snprintf(tmp_line, sizeof(tmp_line), "GroupId=(%u)", - job_ptr->group_id ); - } + group_name = gid_to_string((gid_t) job_ptr->group_id); + snprintf(tmp_line, sizeof(tmp_line), "GroupId=%s(%u)", + group_name, job_ptr->group_id); + xfree(group_name); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); diff --git a/src/api/reconfigure.c b/src/api/reconfigure.c index 9ac7fc5db..4e865fd4d 100644 --- a/src/api/reconfigure.c +++ b/src/api/reconfigure.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * reconfigure.c - request that slurmctld shutdown or re-read the * configuration files - * $Id: reconfigure.c 13672 2008-03-19 23:10:58Z jette $ + * $Id: reconfigure.c 14872 2008-08-25 16:25:28Z jette $ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -109,19 +109,21 @@ slurm_ping (int primary) * slurm_shutdown - issue RPC to have Slurm controller (slurmctld) * cease operations, both the primary and backup controller * are shutdown. - * IN core - controller generates a core file if set + * IN options - 0: all slurm daemons are shutdown + * 1: slurmctld generates a core file + * 2: only the slurmctld is shutdown (no core file) * RET 0 or a slurm error code */ int -slurm_shutdown (uint16_t core) +slurm_shutdown (uint16_t options) { slurm_msg_t req_msg; shutdown_msg_t shutdown_msg; slurm_msg_t_init(&req_msg); - shutdown_msg.core = core; - req_msg.msg_type = REQUEST_SHUTDOWN; - req_msg.data = &shutdown_msg; + shutdown_msg.options = options; + req_msg.msg_type = REQUEST_SHUTDOWN; + req_msg.data = &shutdown_msg; /* * Explicity send the message to both primary diff --git a/src/api/signal.c b/src/api/signal.c index 76916aa66..006ed33dc 100644 --- a/src/api/signal.c +++ b/src/api/signal.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * signal.c - Send a signal to a slurm job or job step - * $Id: signal.c 13672 2008-03-19 23:10:58Z jette $ + * $Id: signal.c 14725 2008-08-08 20:42:06Z jette $ ***************************************************************************** * Copyright (C) 2005 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -172,7 +172,7 @@ _local_send_recv_rc_msgs(const char *nodelist, slurm_msg_type_t type, msg->msg_type = type; msg->data = data; - if((ret_list = slurm_send_recv_msgs(nodelist, msg, 0))) { + if((ret_list = slurm_send_recv_msgs(nodelist, msg, 0, false))) { while((ret_data_info = list_pop(ret_list))) { temp_rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); diff --git a/src/api/step_launch.c b/src/api/step_launch.c index 981fb2da6..2533db78b 100644 --- a/src/api/step_launch.c +++ b/src/api/step_launch.c @@ -65,6 +65,8 @@ #include "src/api/step_ctx.h" #include "src/api/pmi_server.h" +#define STEP_ABORT_TIME 2 + extern char **environ; /********************************************************************** @@ -369,15 +371,16 @@ void slurm_step_launch_wait_finish(slurm_step_ctx_t *ctx) sls->abort_action_taken = true; } if (!time_set) { - /* Only set the time once, because we only - * want to wait 10 seconds, no matter how many + /* Only set the time once, because we only want + * to wait STEP_ABORT_TIME, no matter how many * times the condition variable is signalled. */ - ts.tv_sec = time(NULL) + 10; + ts.tv_sec = time(NULL) + STEP_ABORT_TIME; time_set = true; /* FIXME - should this be a callback? */ info("Job step aborted: Waiting up to " - "10 seconds for job step to finish."); + "%d seconds for job step to finish.", + STEP_ABORT_TIME); } errnum = pthread_cond_timedwait(&sls->cond, @@ -510,7 +513,7 @@ void slurm_step_launch_fwd_signal(slurm_step_ctx_t *ctx, int signo) debug3("sending signal to host %s", name); - if (!(ret_list = slurm_send_recv_msgs(name, &req, 0))) { + if (!(ret_list = slurm_send_recv_msgs(name, &req, 0, false))) { error("fwd_signal: slurm_send_recv_msgs really failed bad"); xfree(name); return; @@ -767,6 +770,13 @@ _exit_handler(struct step_launch_state *sls, slurm_msg_t *exit_msg) task_exit_msg_t *msg = (task_exit_msg_t *) exit_msg->data; int i; + if ((msg->job_id != sls->mpi_info->jobid) || + (msg->step_id != sls->mpi_info->stepid)) { + debug("Received MESSAGE_TASK_EXIT from wrong job: %u.%u", + msg->job_id, msg->step_id); + return; + } + /* Record SIGTERM and SIGKILL termination codes to * recognize abnormal termination */ if (WIFSIGNALED(msg->return_code)) { @@ -1032,7 +1042,7 @@ static int _launch_tasks(slurm_step_ctx_t *ctx, if(!(ret_list = slurm_send_recv_msgs( ctx->step_resp->step_layout->node_list, - &msg, timeout))) { + &msg, timeout, false))) { error("slurm_send_recv_msgs failed miserably: %m"); return SLURM_ERROR; } diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c index 39a034312..7a7396a27 100644 --- a/src/common/assoc_mgr.c +++ b/src/common/assoc_mgr.c @@ -40,6 +40,7 @@ #include <sys/types.h> #include <pwd.h> +#include "src/common/uid.h" #include "src/common/xstring.h" #include "src/slurmdbd/read_config.h" @@ -74,11 +75,11 @@ static int _set_assoc_parent_and_user(acct_association_rec_t *assoc) list_iterator_destroy(itr); } if(assoc->user) { - struct passwd *passwd_ptr = getpwnam(assoc->user); - if(passwd_ptr) - assoc->uid = passwd_ptr->pw_uid; + uid_t pw_uid = uid_from_string(assoc->user); + if(pw_uid == (uid_t) -1) + assoc->uid = (uint32_t)NO_VAL; else - assoc->uid = (uint32_t)NO_VAL; + assoc->uid = pw_uid; } else { assoc->uid = (uint32_t)NO_VAL; } @@ -191,21 +192,20 @@ static int _get_local_user_list(void *db_conn, int enforce) } } else { acct_user_rec_t *user = NULL; - struct passwd *passwd_ptr = NULL; ListIterator itr = list_iterator_create(local_user_list); //START_TIMER; while((user = list_next(itr))) { - passwd_ptr = getpwnam(user->name); - if(passwd_ptr) - user->uid = passwd_ptr->pw_uid; - else + uid_t pw_uid = uid_from_string(user->name); + if(pw_uid == (uid_t) -1) { + error("couldn't get a uid for user %s", + user->name); user->uid = (uint32_t)NO_VAL; + } else + user->uid = pw_uid; } list_iterator_destroy(itr); //END_TIMER2("load_users"); } - - slurm_mutex_unlock(&local_user_lock); return SLURM_SUCCESS; @@ -658,7 +658,7 @@ extern int assoc_mgr_update_local_users(acct_update_object_t *update) ListIterator itr = NULL; int rc = SLURM_SUCCESS; - struct passwd *passwd_ptr = NULL; + uid_t pw_uid; if(!local_user_list) return SLURM_SUCCESS; @@ -693,8 +693,8 @@ extern int assoc_mgr_update_local_users(acct_update_object_t *update) object->qos_list = NULL; } - if(object->admin_level != ACCT_ADMIN_NOTSET) - rec->admin_level = rec->admin_level; + if(object->admin_level != ACCT_ADMIN_NOTSET) + rec->admin_level = object->admin_level; break; case ACCT_ADD_USER: @@ -702,12 +702,13 @@ extern int assoc_mgr_update_local_users(acct_update_object_t *update) //rc = SLURM_ERROR; break; } - passwd_ptr = getpwnam(object->name); - if(passwd_ptr) - object->uid = passwd_ptr->pw_uid; - else + pw_uid = uid_from_string(object->name); + if(pw_uid == (uid_t) -1) { + error("couldn't get a uid for user %s", + object->name); object->uid = NO_VAL; - + } else + object->uid = pw_uid; list_append(local_user_list, object); break; case ACCT_REMOVE_USER: diff --git a/src/common/env.c b/src/common/env.c index bad0e3821..63f2056c0 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -891,7 +891,6 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, * SLURM_NPROCS * SLURM_NODELIST * SLURM_TASKS_PER_NODE - * SLURM_SRUN_COMM_HOST * SLURM_SRUN_COMM_PORT * SLURM_LAUNCH_NODE_IPADDR * @@ -926,8 +925,6 @@ env_array_for_step(char ***dest, env_array_overwrite_fmt(dest, "SLURM_NPROCS", "%u", step->step_layout->task_cnt); env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp); - env_array_overwrite_fmt(dest, "SLURM_SRUN_COMM_HOST", - "%s", launcher_hostname); env_array_overwrite_fmt(dest, "SLURM_SRUN_COMM_PORT", "%hu", launcher_port); diff --git a/src/common/pack.h b/src/common/pack.h index 40dad8a80..78531c7b7 100644 --- a/src/common/pack.h +++ b/src/common/pack.h @@ -248,7 +248,7 @@ int unpackmem_array(char *valp, uint32_t size_valp, Buf buffer); #define safe_pack_bit_fmt(bitmap,max_len,buf) do { \ assert(buf->magic == BUF_MAGIC); \ - assert(max_len < 0xffff); \ + assert(max_len < 0xffffffff); \ if (bitmap) { \ char _tmp_str[max_len]; \ uint32_t _size; \ @@ -280,9 +280,9 @@ int unpackmem_array(char *valp, uint32_t size_valp, Buf buffer); if((char *)str != NULL) \ _size = (uint32_t)strlen(str)+1; \ assert(_size == 0 || str != NULL); \ - assert(_size <= 0xffff); \ + assert(_size <= 0xffffffff); \ assert(buf->magic == BUF_MAGIC); \ - packmem(str,(uint16_t)_size,buf); \ + packmem(str,(uint32_t)_size,buf); \ } while (0) #define packnull(buf) do { \ diff --git a/src/common/parse_config.c b/src/common/parse_config.c index cf156a0fc..4957494d7 100644 --- a/src/common/parse_config.c +++ b/src/common/parse_config.c @@ -3,7 +3,7 @@ * * NOTE: when you see the prefix "s_p_", think "slurm parser". * - * $Id: parse_config.c 14064 2008-05-15 23:53:06Z jette $ + * $Id: parse_config.c 14716 2008-08-07 20:11:01Z da $ ***************************************************************************** * Copyright (C) 2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -478,7 +478,8 @@ static int _handle_uint16(s_p_values_t *v, error("%s value (%s) is less than zero", v->key, value); return -1; } else if (num > 0xffff) { - error("%s value (%s) is greater than 65535", v->key, value); + error("%s value (%s) is greater than 65535", v->key, + value); return -1; } v->data = xmalloc(sizeof(uint16_t)); diff --git a/src/common/parse_time.c b/src/common/parse_time.c index e72f2c93e..33c0e7f42 100644 --- a/src/common/parse_time.c +++ b/src/common/parse_time.c @@ -60,6 +60,7 @@ static int _get_delta(char *time_str, int *pos, long *delta) { int offset; long cnt = 0; + int digit = 0; offset = (*pos) + 1; for ( ; ((time_str[offset]!='\0')&&(time_str[offset]!='\n')); offset++) { @@ -87,10 +88,15 @@ static int _get_delta(char *time_str, int *pos, long *delta) } if ((time_str[offset] >= '0') && (time_str[offset] <= '9')) { cnt = (cnt * 10) + (time_str[offset] - '0'); + digit++; continue; } goto prob; } + + if (!digit) /* No numbers after the '=' */ + return -1; + *pos = offset - 1; *delta = cnt; return 0; diff --git a/src/common/plugstack.c b/src/common/plugstack.c index 831c5344a..62cb45ba3 100644 --- a/src/common/plugstack.c +++ b/src/common/plugstack.c @@ -1269,9 +1269,14 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) break; case S_JOB_NNODES: p2uint32 = va_arg(vargs, uint32_t *); - if (spank->type == S_TYPE_LOCAL) - *p2uint32 = launcher_job->step_layout->node_cnt; - else + if (spank->type == S_TYPE_LOCAL) { + if (launcher_job->step_layout) + *p2uint32 = launcher_job->step_layout->node_cnt; + else { + *p2uint32 = 0; + rc = ESPANK_ENV_NOEXIST; + } + } else *p2uint32 = slurmd_job->nnodes; break; case S_JOB_NODEID: @@ -1284,9 +1289,14 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) break; case S_JOB_TOTAL_TASK_COUNT: p2uint32 = va_arg(vargs, uint32_t *); - if (spank->type == S_TYPE_LOCAL) - *p2uint32 = launcher_job->step_layout->task_cnt; - else + if (spank->type == S_TYPE_LOCAL) { + if (launcher_job->step_layout) + *p2uint32 = launcher_job->step_layout->task_cnt; + else { + *p2uint32 = 0; + rc = ESPANK_ENV_NOEXIST; + } + } else *p2uint32 = slurmd_job->nprocs; break; case S_JOB_NCPUS: @@ -1375,9 +1385,10 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) p2uint32 = va_arg(vargs, uint32_t *); *p2uint32 = (uint32_t) -1; - if (uint32 <= slurmd_job->ntasks) + if ((uint32 <= slurmd_job->ntasks) && + slurmd_job->task && slurmd_job->task[uint32]) { *p2uint32 = slurmd_job->task[uint32]->gtid; - else + } else rc = ESPANK_NOEXIST; break; case S_JOB_GLOBAL_TO_LOCAL_ID: diff --git a/src/common/print_fields.c b/src/common/print_fields.c index cb5d3ed85..e95864f06 100644 --- a/src/common/print_fields.c +++ b/src/common/print_fields.c @@ -67,16 +67,24 @@ extern void print_fields_header(List print_fields_list) { ListIterator itr = NULL; print_field_t *field = NULL; - + int curr_inx = 1; + int field_count = 0; if(!print_fields_list || !print_fields_have_header) return; + field_count = list_count(print_fields_list); + itr = list_iterator_create(print_fields_list); while((field = list_next(itr))) { - if(print_fields_parsable_print) + if(print_fields_parsable_print + == PRINT_FIELDS_PARSABLE_NO_ENDING + && (curr_inx == field_count)) + printf("%s", field->name); + else if(print_fields_parsable_print) printf("%s|", field->name); else printf("%-*.*s ", field->len, field->len, field->name); + curr_inx++; } list_iterator_reset(itr); printf("\n"); @@ -90,7 +98,7 @@ extern void print_fields_header(List print_fields_list) printf("\n"); } -extern void print_fields_date(print_field_t *field, time_t value) +extern void print_fields_date(print_field_t *field, time_t value, int last) { char temp_char[field->len]; time_t now = value; @@ -98,13 +106,16 @@ extern void print_fields_date(print_field_t *field, time_t value) if(!now) now = time(NULL); slurm_make_time_str(&value, (char *)temp_char, field->len); - if(print_fields_parsable_print) + if(print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING + && last) + printf("%s", temp_char); + else if(print_fields_parsable_print) printf("%s|", temp_char); else printf("%-*.*s ", field->len, field->len, temp_char); } -extern void print_fields_str(print_field_t *field, char *value) +extern void print_fields_str(print_field_t *field, char *value, int last) { char temp_char[field->len]; char *print_this = NULL; @@ -116,7 +127,10 @@ extern void print_fields_str(print_field_t *field, char *value) print_this = " "; } - if(print_fields_parsable_print) + if(print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING + && last) + printf("%s", value); + else if(print_fields_parsable_print) printf("%s|", value); else { if(!print_this) { @@ -130,32 +144,48 @@ extern void print_fields_str(print_field_t *field, char *value) } } -extern void print_fields_uint32(print_field_t *field, uint32_t value) +extern void print_fields_uint32(print_field_t *field, uint32_t value, int last) { /* (value == unset) || (value == cleared) */ if((value == NO_VAL) || (value == INFINITE)) { - if(print_fields_parsable_print) + if(print_fields_parsable_print + == PRINT_FIELDS_PARSABLE_NO_ENDING + && last) + ; + else if(print_fields_parsable_print) printf("|"); else printf("%*s ", field->len, " "); } else { - if(print_fields_parsable_print) + if(print_fields_parsable_print + == PRINT_FIELDS_PARSABLE_NO_ENDING + && last) + printf("%u", value); + else if(print_fields_parsable_print) printf("%u|", value); else printf("%*u ", field->len, value); } } -extern void print_fields_uint64(print_field_t *field, uint64_t value) +extern void print_fields_uint64(print_field_t *field, uint64_t value, int last) { /* (value == unset) || (value == cleared) */ if((value == NO_VAL) || (value == INFINITE)) { - if(print_fields_parsable_print) + if(print_fields_parsable_print + == PRINT_FIELDS_PARSABLE_NO_ENDING + && last) + ; + else if(print_fields_parsable_print) printf("|"); else printf("%*s ", field->len, " "); } else { - if(print_fields_parsable_print) + if(print_fields_parsable_print + == PRINT_FIELDS_PARSABLE_NO_ENDING + && last) + printf("%llu", (long long unsigned) value); + else if(print_fields_parsable_print) printf("%llu|", (long long unsigned) value); else printf("%*llu ", field->len, @@ -163,25 +193,33 @@ extern void print_fields_uint64(print_field_t *field, uint64_t value) } } -extern void print_fields_time(print_field_t *field, uint32_t value) +extern void print_fields_time(print_field_t *field, uint32_t value, int last) { /* (value == unset) || (value == cleared) */ if((value == NO_VAL) || (value == INFINITE)) { - if(print_fields_parsable_print) + if(print_fields_parsable_print + == PRINT_FIELDS_PARSABLE_NO_ENDING + && last) + ; + else if(print_fields_parsable_print) printf("|"); else printf("%*s ", field->len, " "); } else { char time_buf[32]; mins2time_str((time_t) value, time_buf, sizeof(time_buf)); - if(print_fields_parsable_print) + if(print_fields_parsable_print + == PRINT_FIELDS_PARSABLE_NO_ENDING + && last) + printf("%s", time_buf); + else if(print_fields_parsable_print) printf("%s|", time_buf); else printf("%*s ", field->len, time_buf); } } -extern void print_fields_char_list(print_field_t *field, List value) +extern void print_fields_char_list(print_field_t *field, List value, int last) { ListIterator itr = NULL; char *print_this = NULL; @@ -204,7 +242,10 @@ extern void print_fields_char_list(print_field_t *field, List value) list_iterator_destroy(itr); } - if(print_fields_parsable_print) + if(print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING + && last) + printf("%s", print_this); + else if(print_fields_parsable_print) printf("%s|", print_this); else { if(strlen(print_this) > field->len) diff --git a/src/common/print_fields.h b/src/common/print_fields.h index 37ade380f..36eed4add 100644 --- a/src/common/print_fields.h +++ b/src/common/print_fields.h @@ -74,17 +74,25 @@ typedef struct { uint16_t type; /* defined in the local function */ } print_field_t; +enum { + PRINT_FIELDS_PARSABLE_NOT = 0, + PRINT_FIELDS_PARSABLE_ENDING, + PRINT_FIELDS_PARSABLE_NO_ENDING +}; + extern int print_fields_parsable_print; extern int print_fields_have_header; extern void destroy_print_field(void *object); extern void print_fields_header(List print_fields_list); -extern void print_fields_date(print_field_t *field, time_t value); -extern void print_fields_str(print_field_t *field, char *value); -extern void print_fields_uint32(print_field_t *field, uint32_t value); -extern void print_fields_uint64(print_field_t *field, uint64_t value); -extern void print_fields_time(print_field_t *field, uint32_t value); -extern void print_fields_char_list(print_field_t *field, List value); +extern void print_fields_date(print_field_t *field, time_t value, int last); +extern void print_fields_str(print_field_t *field, char *value, int last); +extern void print_fields_uint32( + print_field_t *field, uint32_t value, int last); +extern void print_fields_uint64( + print_field_t *field, uint64_t value, int last); +extern void print_fields_time(print_field_t *field, uint32_t value, int last); +extern void print_fields_char_list(print_field_t *field, List value, int last); #define print_fields_uint print_fields_uint32 #endif diff --git a/src/common/read_config.c b/src/common/read_config.c index 2faba8f6a..06951e0f5 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -70,6 +70,7 @@ #include "src/common/parse_time.h" #include "src/common/slurm_selecttype_info.h" #include "src/common/util-net.h" +#include "src/common/uid.h" /* Instantiation of the "extern slurm_ctl_conf_t slurmcltd_conf" * found in slurmctld.h */ @@ -548,13 +549,24 @@ static int parse_partitionname(void **dest, slurm_parser_enum_t type, /* Only "Shared=NO" is valid on XCPU systems */ else if (strcasecmp(tmp, "EXCLUSIVE") == 0) p->max_share = 0; - else if (strncasecmp(tmp, "YES:", 4) == 0) - p->max_share = strtol(&tmp[4], (char **) NULL, 10); - else if (strcasecmp(tmp, "YES") == 0) + else if (strncasecmp(tmp, "YES:", 4) == 0) { + int i = strtol(&tmp[4], (char **) NULL, 10); + if (i <= 1) { + error("Ignoring bad Shared value: %s", + tmp); + p->max_share = 1; /* Shared=NO */ + } else + p->max_share = i; + } else if (strcasecmp(tmp, "YES") == 0) p->max_share = 4; else if (strncasecmp(tmp, "FORCE:", 6) == 0) { - p->max_share = strtol(&tmp[6], (char **) NULL, 10) | - SHARED_FORCE; + int i = strtol(&tmp[6], (char **) NULL, 10); + if (i <= 1) { + error("Ignoring bad Shared value: %s", + tmp); + p->max_share = 1; /* Shared=NO */ + } else + p->max_share = i | SHARED_FORCE; } else if (strcasecmp(tmp, "FORCE") == 0) p->max_share = 4 | SHARED_FORCE; #endif @@ -1152,6 +1164,7 @@ free_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr, bool purge_node_hash) xfree (ctl_conf_ptr->propagate_rlimits); xfree (ctl_conf_ptr->resume_program); xfree (ctl_conf_ptr->slurm_conf); + xfree (ctl_conf_ptr->sched_params); xfree (ctl_conf_ptr->schedtype); xfree (ctl_conf_ptr->select_type); xfree (ctl_conf_ptr->slurm_user_name); @@ -1408,26 +1421,6 @@ slurm_conf_reinit(const char *file_name) return rc; } -/* - * slurm_conf_reinit_nolock - reload the slurm configuration from a file. - * This does the same thing as slurm_conf_reinit, but it performs - * no internal locking. You are responsible for calling slurm_conf_lock() - * before calling this function, and calling slurm_conf_unlock() - * afterwards. - * IN file_name - name of the slurm configuration file to be read - * If file_name is NULL, then this routine tries to use - * the value in the SLURM_CONF env variable. Failing that, - * it uses the compiled-in default file name. - * Unlike slurm_conf_init, slurm_conf_reinit will always reread the - * file and reinitialize the configuration structures. - * RET SLURM_SUCCESS if conf file is reinitialized, otherwise SLURM_ERROR. - */ -extern int -slurm_conf_reinit_nolock(const char *file_name) -{ - return _internal_reinit(file_name); -} - extern void slurm_conf_mutex_init(void) { @@ -1557,13 +1550,10 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) s_p_get_string(&default_storage_pass, "DefaultStoragePass", hashtbl); s_p_get_string(&default_storage_loc, "DefaultStorageLoc", hashtbl); s_p_get_uint32(&default_storage_port, "DefaultStoragePort", hashtbl); - - if (!s_p_get_string(&conf->job_credential_private_key, - "JobCredentialPrivateKey", hashtbl)) - fatal("JobCredentialPrivateKey not set"); - if (!s_p_get_string(&conf->job_credential_public_certificate, - "JobCredentialPublicCertificate", hashtbl)) - fatal("JobCredentialPublicCertificate not set"); + s_p_get_string(&conf->job_credential_private_key, + "JobCredentialPrivateKey", hashtbl); + s_p_get_string(&conf->job_credential_public_certificate, + "JobCredentialPublicCertificate", hashtbl); if (s_p_get_uint16(&conf->max_job_cnt, "MaxJobCount", hashtbl) && conf->max_job_cnt < 1) @@ -1580,6 +1570,13 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_string(&conf->crypto_type, "CryptoType", hashtbl)) conf->crypto_type = xstrdup(DEFAULT_CRYPTO_TYPE); + if ((strcmp(conf->crypto_type, "crypto/openssl") == 0) && + ((conf->job_credential_private_key == NULL) || + (conf->job_credential_public_certificate == NULL))) { + fatal("CryptoType=crypto/openssl requires that both " + "JobCredentialPrivateKey and " + "JobCredentialPublicCertificate be set"); + } if ((s_p_get_uint32(&conf->def_mem_per_task, "DefMemPerCPU", hashtbl)) || (s_p_get_uint32(&conf->def_mem_per_task, "DefMemPerTask", hashtbl))) @@ -1893,6 +1890,9 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_string(&conf->schedtype, "SchedulerType", hashtbl)) conf->schedtype = xstrdup(DEFAULT_SCHEDTYPE); + else if ((strcmp(conf->schedtype, "sched/gang") == 0) && + (conf->fast_schedule == 0)) + fatal("FastSchedule=0 is not supported with sched/gang"); if (!s_p_get_string(&conf->select_type, "SelectType", hashtbl)) conf->select_type = xstrdup(DEFAULT_SELECT_TYPE); @@ -1918,14 +1918,13 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) conf->slurm_user_name = xstrdup("root"); conf->slurm_user_id = 0; } else { - struct passwd *slurm_passwd; - slurm_passwd = getpwnam(conf->slurm_user_name); - if (slurm_passwd == NULL) { + uid_t my_uid = uid_from_string(conf->slurm_user_name); + if (my_uid == (uid_t) -1) { error ("Invalid user for SlurmUser %s, ignored", conf->slurm_user_name); xfree(conf->slurm_user_name); } else { - conf->slurm_user_id = slurm_passwd->pw_uid; + conf->slurm_user_id = my_uid; } } diff --git a/src/common/read_config.h b/src/common/read_config.h index c64361b4c..5c6e52774 100644 --- a/src/common/read_config.h +++ b/src/common/read_config.h @@ -190,22 +190,6 @@ extern int slurm_conf_init(const char *file_name); */ extern int slurm_conf_reinit(const char *file_name); -/* - * slurm_conf_reinit_nolock - reload the slurm configuration from a file. - * This does the same thing as slurm_conf_reinit, but it performs - * no internal locking. You are responsible for calling slurm_conf_lock() - * before calling this function, and calling slurm_conf_unlock() - * afterwards. - * IN file_name - name of the slurm configuration file to be read - * If file_name is NULL, then this routine tries to use - * the value in the SLURM_CONF env variable. Failing that, - * it uses the compiled-in default file name. - * Unlike slurm_conf_init, slurm_conf_reinit will always reread the - * file and reinitialize the configuration structures. - * RET SLURM_SUCCESS if conf file is reinitialized, otherwise SLURM_ERROR. - */ -extern int slurm_conf_reinit_nolock(const char *file_name); - /* * slurm_conf_mutex_init - init the slurm_conf mutex */ diff --git a/src/common/slurm_accounting_storage.c b/src/common/slurm_accounting_storage.c index 6265ce45a..568c052f2 100644 --- a/src/common/slurm_accounting_storage.c +++ b/src/common/slurm_accounting_storage.c @@ -586,18 +586,18 @@ extern void pack_acct_user_rec(void *in, Buf buffer) { ListIterator itr = NULL; acct_user_rec_t *object = (acct_user_rec_t *)in; - uint32_t count = 0; + uint32_t count = NO_VAL; acct_coord_rec_t *coord = NULL; acct_association_rec_t *assoc = NULL; char *tmp_info = NULL; if(!object) { pack16(0, buffer); - pack32(0, buffer); - pack32(0, buffer); + pack32(NO_VAL, buffer); + pack32(NO_VAL, buffer); packnull(buffer); packnull(buffer); - pack32(0, buffer); + pack32(NO_VAL, buffer); pack32(0, buffer); return; } @@ -607,27 +607,27 @@ extern void pack_acct_user_rec(void *in, Buf buffer) count = list_count(object->assoc_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->assoc_list); while((assoc = list_next(itr))) { pack_acct_association_rec(assoc, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; if(object->coord_accts) count = list_count(object->coord_accts); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->coord_accts); while((coord = list_next(itr))) { pack_acct_coord_rec(coord, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; packstr(object->default_acct, buffer); packstr(object->name, buffer); @@ -637,14 +637,14 @@ extern void pack_acct_user_rec(void *in, Buf buffer) pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->qos_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; pack32(object->uid, buffer); } @@ -652,7 +652,7 @@ extern int unpack_acct_user_rec(void **object, Buf buffer) { uint32_t uint32_tmp; acct_user_rec_t *object_ptr = xmalloc(sizeof(acct_user_rec_t)); - uint32_t count = 0; + uint32_t count = NO_VAL; acct_coord_rec_t *coord = NULL; acct_association_rec_t *assoc = NULL; int i; @@ -661,7 +661,7 @@ extern int unpack_acct_user_rec(void **object, Buf buffer) *object = object_ptr; safe_unpack16((uint16_t *)&object_ptr->admin_level, buffer); safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->assoc_list = list_create(destroy_acct_association_rec); for(i=0; i<count; i++) { @@ -672,7 +672,7 @@ extern int unpack_acct_user_rec(void **object, Buf buffer) } } safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->coord_accts = list_create(destroy_acct_coord_rec); for(i=0; i<count; i++) { if(unpack_acct_coord_rec((void *)&coord, buffer) @@ -684,7 +684,7 @@ extern int unpack_acct_user_rec(void **object, Buf buffer) safe_unpackstr_xmalloc(&object_ptr->default_acct, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&object_ptr->name, &uint32_tmp, buffer); safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->qos_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -727,18 +727,18 @@ extern void pack_acct_account_rec(void *in, Buf buffer) { acct_coord_rec_t *coord = NULL; ListIterator itr = NULL; - uint32_t count = 0; + uint32_t count = NO_VAL; acct_account_rec_t *object = (acct_account_rec_t *)in; acct_association_rec_t *assoc = NULL; char *tmp_info = NULL; if(!object) { - pack32(0, buffer); - pack32(0, buffer); + pack32(NO_VAL, buffer); + pack32(NO_VAL, buffer); packnull(buffer); packnull(buffer); packnull(buffer); - pack32(0, buffer); + pack32(NO_VAL, buffer); return; } @@ -746,27 +746,27 @@ extern void pack_acct_account_rec(void *in, Buf buffer) count = list_count(object->assoc_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->assoc_list); while((assoc = list_next(itr))) { pack_acct_association_rec(assoc, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; if(object->coordinators) count = list_count(object->coordinators); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->coordinators); while((coord = list_next(itr))) { pack_acct_coord_rec(coord, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; packstr(object->description, buffer); packstr(object->name, buffer); @@ -777,14 +777,14 @@ extern void pack_acct_account_rec(void *in, Buf buffer) pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->qos_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; } extern int unpack_acct_account_rec(void **object, Buf buffer) @@ -800,7 +800,7 @@ extern int unpack_acct_account_rec(void **object, Buf buffer) *object = object_ptr; safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->assoc_list = list_create(destroy_acct_association_rec); for(i=0; i<count; i++) { @@ -811,7 +811,7 @@ extern int unpack_acct_account_rec(void **object, Buf buffer) } } safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->coordinators = list_create(destroy_acct_coord_rec); for(i=0; i<count; i++) { if(unpack_acct_coord_rec((void *)&coord, buffer) @@ -824,7 +824,7 @@ extern int unpack_acct_account_rec(void **object, Buf buffer) safe_unpackstr_xmalloc(&object_ptr->name, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&object_ptr->organization, &uint32_tmp, buffer); safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->qos_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -920,11 +920,11 @@ extern void pack_acct_cluster_rec(void *in, Buf buffer) { cluster_accounting_rec_t *acct_info = NULL; ListIterator itr = NULL; - uint32_t count = 0; + uint32_t count = NO_VAL; acct_cluster_rec_t *object = (acct_cluster_rec_t *)in; if(!object) { - pack32(0, buffer); + pack32(NO_VAL, buffer); packnull(buffer); pack32(0, buffer); pack32(0, buffer); @@ -941,14 +941,14 @@ extern void pack_acct_cluster_rec(void *in, Buf buffer) pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->accounting_list); while((acct_info = list_next(itr))) { pack_cluster_accounting_rec(acct_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; packstr(object->control_host, buffer); pack32(object->control_port, buffer); @@ -972,7 +972,7 @@ extern int unpack_acct_cluster_rec(void **object, Buf buffer) *object = object_ptr; safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->accounting_list = list_create(destroy_cluster_accounting_rec); for(i=0; i<count; i++) { @@ -1036,11 +1036,11 @@ extern void pack_acct_association_rec(void *in, Buf buffer) { acct_accounting_rec_t *acct_info = NULL; ListIterator itr = NULL; - uint32_t count = 0; + uint32_t count = NO_VAL; acct_association_rec_t *object = (acct_association_rec_t *)in; if(!object) { - pack32(0, buffer); + pack32(NO_VAL, buffer); packnull(buffer); packnull(buffer); pack32(0, buffer); @@ -1065,14 +1065,14 @@ extern void pack_acct_association_rec(void *in, Buf buffer) pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->accounting_list); while((acct_info = list_next(itr))) { pack_acct_accounting_rec(acct_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; packstr(object->acct, buffer); packstr(object->cluster, buffer); @@ -1104,7 +1104,7 @@ extern int unpack_acct_association_rec(void **object, Buf buffer) *object = object_ptr; safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->accounting_list = list_create(destroy_acct_accounting_rec); for(i=0; i<count; i++) { @@ -1220,13 +1220,13 @@ extern void pack_acct_user_cond(void *in, Buf buffer) char *tmp_info = NULL; ListIterator itr = NULL; acct_user_cond_t *object = (acct_user_cond_t *)in; - uint32_t count = 0; + uint32_t count = NO_VAL; if(!object) { pack16(0, buffer); pack_acct_association_cond(NULL, buffer); - pack32(0, buffer); - pack32(0, buffer); + pack32(NO_VAL, buffer); + pack32(NO_VAL, buffer); pack16(0, buffer); pack16(0, buffer); pack16(0, buffer); @@ -1242,28 +1242,28 @@ extern void pack_acct_user_cond(void *in, Buf buffer) pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->def_acct_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; if(object->qos_list) count = list_count(object->qos_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->qos_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; pack16((uint16_t)object->with_assocs, buffer); pack16((uint16_t)object->with_coords, buffer); @@ -1288,7 +1288,7 @@ extern int unpack_acct_user_cond(void **object, Buf buffer) goto unpack_error; safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->def_acct_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1296,7 +1296,7 @@ extern int unpack_acct_user_cond(void **object, Buf buffer) } } safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->qos_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1320,13 +1320,13 @@ extern void pack_acct_account_cond(void *in, Buf buffer) char *tmp_info = NULL; ListIterator itr = NULL; acct_account_cond_t *object = (acct_account_cond_t *)in; - uint32_t count = 0; + uint32_t count = NO_VAL; if(!object) { pack_acct_association_cond(NULL, buffer); - pack32(0, buffer); - pack32(0, buffer); - pack32(0, buffer); + pack32(NO_VAL, buffer); + pack32(NO_VAL, buffer); + pack32(NO_VAL, buffer); pack16(0, buffer); pack16(0, buffer); pack16(0, buffer); @@ -1334,47 +1334,47 @@ extern void pack_acct_account_cond(void *in, Buf buffer) } pack_acct_association_cond(object->assoc_cond, buffer); - count = 0; + count = NO_VAL; if(object->description_list) count = list_count(object->description_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->description_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; if(object->organization_list) count = list_count(object->organization_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->organization_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); - count = 0; } + count = NO_VAL; if(object->qos_list) count = list_count(object->qos_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->qos_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); - count = 0; + count = NO_VAL; } pack16((uint16_t)object->with_assocs, buffer); @@ -1396,7 +1396,7 @@ extern int unpack_acct_account_cond(void **object, Buf buffer) goto unpack_error; safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->description_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1404,7 +1404,7 @@ extern int unpack_acct_account_cond(void **object, Buf buffer) } } safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->organization_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1412,7 +1412,7 @@ extern int unpack_acct_account_cond(void **object, Buf buffer) } } safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->qos_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1436,10 +1436,10 @@ extern void pack_acct_cluster_cond(void *in, Buf buffer) char *tmp_info = NULL; ListIterator itr = NULL; acct_cluster_cond_t *object = (acct_cluster_cond_t *)in; - uint32_t count = 0; + uint32_t count = NO_VAL; if(!object) { - pack32(0, buffer); + pack32(NO_VAL, buffer); pack32(0, buffer); pack32(0, buffer); pack16(0, buffer); @@ -1452,7 +1452,7 @@ extern void pack_acct_cluster_cond(void *in, Buf buffer) pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->cluster_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); @@ -1477,7 +1477,7 @@ extern int unpack_acct_cluster_cond(void **object, Buf buffer) *object = object_ptr; safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->cluster_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1501,25 +1501,25 @@ unpack_error: extern void pack_acct_association_cond(void *in, Buf buffer) { char *tmp_info = NULL; - uint32_t count = 0; + uint32_t count = NO_VAL; ListIterator itr = NULL; acct_association_cond_t *object = (acct_association_cond_t *)in; if(!object) { + pack32(NO_VAL, buffer); + pack32(NO_VAL, buffer); pack32(0, buffer); + pack32(NO_VAL, buffer); pack32(0, buffer); pack32(0, buffer); pack32(0, buffer); pack32(0, buffer); - pack32(0, buffer); - pack32(0, buffer); - pack32(0, buffer); - pack32(0, buffer); + pack32(NO_VAL, buffer); packnull(buffer); pack32(0, buffer); pack32(0, buffer); - pack32(0, buffer); + pack32(NO_VAL, buffer); pack16(0, buffer); pack16(0, buffer); pack16(0, buffer); @@ -1531,27 +1531,27 @@ extern void pack_acct_association_cond(void *in, Buf buffer) count = list_count(object->acct_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->acct_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; if(object->cluster_list) count = list_count(object->cluster_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->cluster_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; pack32(object->fairshare, buffer); @@ -1559,13 +1559,13 @@ extern void pack_acct_association_cond(void *in, Buf buffer) count = list_count(object->id_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->id_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } } - count = 0; + count = NO_VAL; pack32(object->max_cpu_secs_per_job, buffer); pack32(object->max_jobs, buffer); @@ -1576,14 +1576,14 @@ extern void pack_acct_association_cond(void *in, Buf buffer) count = list_count(object->partition_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->partition_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; packstr(object->parent_acct, buffer); @@ -1594,14 +1594,14 @@ extern void pack_acct_association_cond(void *in, Buf buffer) count = list_count(object->user_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->user_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; pack16((uint16_t)object->with_usage, buffer); pack16((uint16_t)object->with_deleted, buffer); @@ -1620,7 +1620,7 @@ extern int unpack_acct_association_cond(void **object, Buf buffer) *object = object_ptr; safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->acct_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1628,7 +1628,7 @@ extern int unpack_acct_association_cond(void **object, Buf buffer) } } safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->cluster_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1639,7 +1639,7 @@ extern int unpack_acct_association_cond(void **object, Buf buffer) safe_unpack32(&object_ptr->fairshare, buffer); safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->id_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1653,7 +1653,7 @@ extern int unpack_acct_association_cond(void **object, Buf buffer) safe_unpack32(&object_ptr->max_wall_duration_per_job, buffer); safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->partition_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1667,7 +1667,7 @@ extern int unpack_acct_association_cond(void **object, Buf buffer) safe_unpack32(&object_ptr->usage_start, buffer); safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->user_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1691,24 +1691,23 @@ extern void pack_acct_job_cond(void *in, Buf buffer) { char *tmp_info = NULL; jobacct_selected_step_t *job = NULL; - uint32_t count = 0; + uint32_t count = NO_VAL; ListIterator itr = NULL; acct_job_cond_t *object = (acct_job_cond_t *)in; if(!object) { - pack32(0, buffer); - pack32(0, buffer); - pack32(0, buffer); + pack32(NO_VAL, buffer); + pack32(NO_VAL, buffer); + pack32(NO_VAL, buffer); pack16(0, buffer); - pack16(0, buffer); - pack32(0, buffer); - pack32(0, buffer); - pack32(0, buffer); - pack32(0, buffer); - pack32(0, buffer); + pack32(NO_VAL, buffer); + pack32(NO_VAL, buffer); + pack32(NO_VAL, buffer); + pack32(NO_VAL, buffer); pack32(0, buffer); pack32(0, buffer); + pack32(NO_VAL, buffer); pack16(0, buffer); return; } @@ -1717,39 +1716,39 @@ extern void pack_acct_job_cond(void *in, Buf buffer) count = list_count(object->acct_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->acct_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; if(object->associd_list) count = list_count(object->associd_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->associd_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } } - count = 0; + count = NO_VAL; if(object->cluster_list) count = list_count(object->cluster_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->cluster_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; pack16(object->duplicates, buffer); @@ -1757,52 +1756,52 @@ extern void pack_acct_job_cond(void *in, Buf buffer) count = list_count(object->groupid_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->groupid_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } } - count = 0; + count = NO_VAL; if(object->partition_list) count = list_count(object->partition_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->partition_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; if(object->step_list) count = list_count(object->step_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->step_list); while((job = list_next(itr))) { pack_jobacct_selected_step(job, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; if(object->state_list) count = list_count(object->state_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->state_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; pack32(object->usage_end, buffer); pack32(object->usage_start, buffer); @@ -1811,14 +1810,14 @@ extern void pack_acct_job_cond(void *in, Buf buffer) count = list_count(object->userid_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->userid_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; pack16(object->without_steps, buffer); } @@ -1834,7 +1833,7 @@ extern int unpack_acct_job_cond(void **object, Buf buffer) *object = object_ptr; safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->acct_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1843,7 +1842,7 @@ extern int unpack_acct_job_cond(void **object, Buf buffer) } safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->associd_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1852,7 +1851,7 @@ extern int unpack_acct_job_cond(void **object, Buf buffer) } safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->cluster_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1863,7 +1862,7 @@ extern int unpack_acct_job_cond(void **object, Buf buffer) safe_unpack16(&object_ptr->duplicates, buffer); safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->groupid_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1872,7 +1871,7 @@ extern int unpack_acct_job_cond(void **object, Buf buffer) } safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->partition_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1882,7 +1881,7 @@ extern int unpack_acct_job_cond(void **object, Buf buffer) safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->step_list = list_create(destroy_jobacct_selected_step); for(i=0; i<count; i++) { @@ -1892,7 +1891,7 @@ extern int unpack_acct_job_cond(void **object, Buf buffer) } safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->state_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1904,7 +1903,7 @@ extern int unpack_acct_job_cond(void **object, Buf buffer) safe_unpack32(&object_ptr->usage_start, buffer); safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->userid_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1924,15 +1923,15 @@ unpack_error: extern void pack_acct_qos_cond(void *in, Buf buffer) { - uint32_t count = 0; + uint32_t count = NO_VAL; char *tmp_info = NULL; ListIterator itr = NULL; acct_qos_cond_t *object = (acct_qos_cond_t *)in; if(!object) { - pack32(0, buffer); - pack32(0, buffer); - pack32(0, buffer); + pack32(NO_VAL, buffer); + pack32(NO_VAL, buffer); + pack32(NO_VAL, buffer); pack16(0, buffer); return; } @@ -1941,40 +1940,40 @@ extern void pack_acct_qos_cond(void *in, Buf buffer) count = list_count(object->description_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->description_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; if(object->id_list) count = list_count(object->id_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->id_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; if(object->name_list) count = list_count(object->name_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->name_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; pack16(object->with_deleted, buffer); } @@ -1990,7 +1989,7 @@ extern int unpack_acct_qos_cond(void **object, Buf buffer) *object = object_ptr; safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->description_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -1999,7 +1998,7 @@ extern int unpack_acct_qos_cond(void **object, Buf buffer) } safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->id_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -2008,7 +2007,7 @@ extern int unpack_acct_qos_cond(void **object, Buf buffer) } safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->name_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -2027,15 +2026,15 @@ unpack_error: extern void pack_acct_txn_cond(void *in, Buf buffer) { - uint32_t count = 0; + uint32_t count = NO_VAL; char *tmp_info = NULL; ListIterator itr = NULL; acct_txn_cond_t *object = (acct_txn_cond_t *)in; if(!object) { - pack32(0, buffer); - pack32(0, buffer); - pack32(0, buffer); + pack32(NO_VAL, buffer); + pack32(NO_VAL, buffer); + pack32(NO_VAL, buffer); pack32(0, buffer); pack32(0, buffer); return; @@ -2044,40 +2043,40 @@ extern void pack_acct_txn_cond(void *in, Buf buffer) count = list_count(object->action_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->action_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; if(object->actor_list) count = list_count(object->actor_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->actor_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; if(object->id_list) count = list_count(object->id_list); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->id_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } - count = 0; + count = NO_VAL; pack32(object->time_end, buffer); pack32(object->time_start, buffer); @@ -2094,7 +2093,7 @@ extern int unpack_acct_txn_cond(void **object, Buf buffer) *object = object_ptr; safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->action_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -2103,7 +2102,7 @@ extern int unpack_acct_txn_cond(void **object, Buf buffer) } safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->actor_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -2112,7 +2111,7 @@ extern int unpack_acct_txn_cond(void **object, Buf buffer) } safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->id_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); @@ -2133,7 +2132,7 @@ unpack_error: extern void pack_acct_update_object(acct_update_object_t *object, Buf buffer) { - uint32_t count = 0; + uint32_t count = NO_VAL; ListIterator itr = NULL; void *acct_object = NULL; void (*my_function) (void *object, Buf buffer); @@ -2165,7 +2164,7 @@ extern void pack_acct_update_object(acct_update_object_t *object, Buf buffer) count = list_count(object->objects); pack32(count, buffer); - if(count) { + if(count && count != NO_VAL) { itr = list_iterator_create(object->objects); while((acct_object = list_next(itr))) { (*(my_function))(acct_object, buffer); @@ -2214,7 +2213,7 @@ extern int unpack_acct_update_object(acct_update_object_t **object, Buf buffer) goto unpack_error; } safe_unpack32(&count, buffer); - if(count) { + if(count != NO_VAL) { object_ptr->objects = list_create((*(my_destroy))); for(i=0; i<count; i++) { if(((*(my_function))(&acct_object, buffer)) @@ -2322,39 +2321,39 @@ extern acct_admin_level_t str_2_acct_admin_level(char *level) extern void log_assoc_rec(acct_association_rec_t *assoc_ptr) { - debug("association rec id : %u", assoc_ptr->id); - debug(" acct : %s", assoc_ptr->acct); - debug(" cluster : %s", assoc_ptr->cluster); + debug2("association rec id : %u", assoc_ptr->id); + debug2(" acct : %s", assoc_ptr->acct); + debug2(" cluster : %s", assoc_ptr->cluster); if(assoc_ptr->fairshare == INFINITE) - debug(" fairshare : NONE"); + debug2(" fairshare : NONE"); else - debug(" fairshare : %u", - assoc_ptr->fairshare); + debug2(" fairshare : %u", + assoc_ptr->fairshare); if(assoc_ptr->max_cpu_secs_per_job == INFINITE) - debug(" max_cpu_secs_per_job : NONE"); + debug2(" max_cpu_secs_per_job : NONE"); else - debug(" max_cpu_secs_per_job : %d", - assoc_ptr->max_cpu_secs_per_job); + debug2(" max_cpu_secs_per_job : %d", + assoc_ptr->max_cpu_secs_per_job); if(assoc_ptr->max_jobs == INFINITE) - debug(" max_jobs : NONE"); + debug2(" max_jobs : NONE"); else - debug(" max_jobs : %u", assoc_ptr->max_jobs); + debug2(" max_jobs : %u", assoc_ptr->max_jobs); if(assoc_ptr->max_nodes_per_job == INFINITE) - debug(" max_nodes_per_job : NONE"); + debug2(" max_nodes_per_job : NONE"); else - debug(" max_nodes_per_job : %d", - assoc_ptr->max_nodes_per_job); + debug2(" max_nodes_per_job : %d", + assoc_ptr->max_nodes_per_job); if(assoc_ptr->max_wall_duration_per_job == INFINITE) - debug(" max_wall_duration_per_job : NONE"); + debug2(" max_wall_duration_per_job : NONE"); else - debug(" max_wall_duration_per_job : %d", - assoc_ptr->max_wall_duration_per_job); - debug(" parent_acct : %s", assoc_ptr->parent_acct); - debug(" partition : %s", assoc_ptr->partition); - debug(" user : %s(%u)", - assoc_ptr->user, assoc_ptr->uid); - debug(" used_jobs : %u", assoc_ptr->used_jobs); - debug(" used_share : %u", assoc_ptr->used_share); + debug2(" max_wall_duration_per_job : %d", + assoc_ptr->max_wall_duration_per_job); + debug2(" parent_acct : %s", assoc_ptr->parent_acct); + debug2(" partition : %s", assoc_ptr->partition); + debug2(" user : %s(%u)", + assoc_ptr->user, assoc_ptr->uid); + debug2(" used_jobs : %u", assoc_ptr->used_jobs); + debug2(" used_share : %u", assoc_ptr->used_share); } /* diff --git a/src/common/slurm_accounting_storage.h b/src/common/slurm_accounting_storage.h index f0b7bb3a7..d0a79d1b1 100644 --- a/src/common/slurm_accounting_storage.h +++ b/src/common/slurm_accounting_storage.h @@ -222,6 +222,11 @@ typedef struct { uint16_t with_deleted; } acct_user_cond_t; +/* If there is something that can be altered here it will need to + * added as something to check for when modifying a user since a user + * can modify there default account but nothing else in + * src/slurmdbd/proc_req.c. + */ typedef struct { acct_admin_level_t admin_level; List assoc_list; /* list of acct_association_rec_t *'s */ diff --git a/src/common/slurm_cred.c b/src/common/slurm_cred.c index adc3a68b6..57fd2925f 100644 --- a/src/common/slurm_cred.c +++ b/src/common/slurm_cred.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/common/slurm_cred.c - SLURM job credential functions - * $Id: slurm_cred.c 14499 2008-07-11 22:54:48Z jette $ + * $Id: slurm_cred.c 14884 2008-08-25 21:39:19Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -418,7 +418,6 @@ slurm_cred_creator_ctx_create(const char *path) { slurm_cred_ctx_t ctx = NULL; - xassert(path != NULL); if (_slurm_crypto_init() < 0) return NULL; @@ -447,7 +446,6 @@ slurm_cred_verifier_ctx_create(const char *path) { slurm_cred_ctx_t ctx = NULL; - xassert(path != NULL); if (_slurm_crypto_init() < 0) return NULL; diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 139b8e444..6000ad0c3 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -593,6 +593,24 @@ char *slurm_get_accounting_storage_loc(void) return storage_loc; } +/* slurm_get_accounting_storage_enforce + * returns whether or not to enforce associations + */ +int slurm_get_accounting_storage_enforce(void) +{ + int enforce = 0; + slurm_ctl_conf_t *conf; + + if(slurmdbd_conf) { + } else { + conf = slurm_conf_lock(); + enforce = conf->accounting_storage_enforce; + slurm_conf_unlock(); + } + return enforce; + +} + /* slurm_set_accounting_storage_loc * IN: char *loc (name of file or database) * RET 0 or error code @@ -2549,12 +2567,13 @@ int slurm_send_only_node_msg(slurm_msg_t *req) * IN nodelist - list of nodes to send to. * IN msg - a slurm_msg struct to be sent by the function * IN timeout - how long to wait in milliseconds + * IN quiet - if set, reduce logging details * RET List - List containing the responses of the childern * (if any) we forwarded the message to. List * containing type (ret_data_info_t). */ List slurm_send_recv_msgs(const char *nodelist, slurm_msg_t *msg, - int timeout) + int timeout, bool quiet) { List ret_list = NULL; List tmp_ret_list = NULL; @@ -2572,7 +2591,7 @@ List slurm_send_recv_msgs(const char *nodelist, slurm_msg_t *msg, #ifdef HAVE_FRONT_END /* only send to the front end node */ name = nodelist_nth_host(nodelist, 0); - if(!name) { + if (!name) { error("slurm_send_recv_msgs: " "can't get the first name out of %s", nodelist); @@ -2587,19 +2606,26 @@ List slurm_send_recv_msgs(const char *nodelist, slurm_msg_t *msg, while((name = hostlist_shift(hl))) { if(slurm_conf_get_addr(name, &msg->address) == SLURM_ERROR) { - error("slurm_send_recv_msgs: can't get addr for " - "host %s", name); + if (quiet) { + debug("slurm_send_recv_msgs: can't get addr " + "for host %s", name); + } else { + error("slurm_send_recv_msgs: can't get addr " + "for host %s", name); + } mark_as_failed_forward(&tmp_ret_list, name, - SLURM_COMMUNICATIONS_CONNECTION_ERROR); + SLURM_COMMUNICATIONS_CONNECTION_ERROR); free(name); continue; } if ((fd = slurm_open_msg_conn(&msg->address)) < 0) { - error("slurm_send_recv_msgs to %s: %m", name); - + if (quiet) + debug("slurm_send_recv_msgs to %s: %m", name); + else + error("slurm_send_recv_msgs to %s: %m", name); mark_as_failed_forward(&tmp_ret_list, name, - SLURM_COMMUNICATIONS_CONNECTION_ERROR); + SLURM_COMMUNICATIONS_CONNECTION_ERROR); free(name); continue; } @@ -2617,8 +2643,15 @@ List slurm_send_recv_msgs(const char *nodelist, slurm_msg_t *msg, if(!(ret_list = _send_and_recv_msgs(fd, msg, timeout))) { xfree(msg->forward.nodelist); - error("slurm_send_recv_msgs(_send_and_recv_msgs) " - "to %s: %m", name); + if (quiet) { + debug("slurm_send_recv_msgs" + "(_send_and_recv_msgs) to %s: %m", + name); + } else { + error("slurm_send_recv_msgs" + "(_send_and_recv_msgs) to %s: %m", + name); + } mark_as_failed_forward(&tmp_ret_list, name, errno); free(name); continue; diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h index 8c2fbabef..c293ab2ce 100644 --- a/src/common/slurm_protocol_api.h +++ b/src/common/slurm_protocol_api.h @@ -245,6 +245,11 @@ char *slurm_get_accounting_storage_host(void); */ char *slurm_get_accounting_storage_loc(void); +/* slurm_get_accounting_storage_enforce + * returns whether or not to enforce associations + */ +int slurm_get_accounting_storage_enforce(void); + /* slurm_set_accounting_storage_loc * IN: char *loc (name of file or database) * RET 0 or error code @@ -778,11 +783,13 @@ int slurm_send_recv_node_msg(slurm_msg_t * request_msg, * IN nodelist - list of nodes to send to. * IN msg - a slurm_msg struct to be sent by the function * IN timeout - how long to wait in milliseconds + * IN quiet - if set, reduce logging details * RET List - List containing the responses of the childern * (if any) we forwarded the message to. List * containing type (ret_types_t). */ -List slurm_send_recv_msgs(const char *nodelist, slurm_msg_t *msg, int timeout); +List slurm_send_recv_msgs(const char *nodelist, slurm_msg_t *msg, int timeout, + bool quiet); /* * Send a message to msg->address diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index e7674c090..a5ac871f7 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -1398,6 +1398,7 @@ extern int slurm_free_msg_data(slurm_msg_type_t type, void *data) case REQUEST_SIGNAL_JOB: slurm_free_signal_job_msg(data); break; + case REQUEST_ABORT_JOB: case REQUEST_TERMINATE_JOB: slurm_free_kill_job_msg(data); break; diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index d6ed3e7ed..587f5d2cf 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -184,7 +184,8 @@ typedef enum { REQUEST_SIGNAL_JOB, REQUEST_TERMINATE_JOB, MESSAGE_EPILOG_COMPLETE, - DEFUNCT_REQUEST_SPAWN_TASK, /* DEFUNCT */ + REQUEST_ABORT_JOB, /* job shouldn't be running, kill it without + * job/step/task complete responses */ REQUEST_FILE_BCAST, TASK_USER_MANAGED_IO_STREAM, @@ -387,7 +388,7 @@ typedef struct epilog_complete_msg { } epilog_complete_msg_t; typedef struct shutdown_msg { - uint16_t core; + uint16_t options; } shutdown_msg_t; typedef struct last_update_msg { @@ -516,8 +517,10 @@ typedef struct return_code_msg { * the event of some launch failure or race condition preventing slurmd * from getting the MPIRUN_PARTITION at that time. It is needed for * the job epilog. */ +#define SIG_TIME_LIMIT 996 /* Dummy signal value i time limit reached */ +#define SIG_ABORT 997 /* Dummy signal value to abort a job */ #define SIG_NODE_FAIL 998 /* Dummy signal value to signify node failure */ -#define SIG_FAILURE 999 /* Dummy signal value to signify sys failure */ +#define SIG_FAILURE 999 /* Dummy signal value to signify sys failure */ typedef struct kill_job_msg { uint32_t job_id; uint16_t job_state; diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 9be1aabca..c7ef3665c 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -590,6 +590,7 @@ pack_msg(slurm_msg_t const *msg, Buf buffer) case REQUEST_SIGNAL_JOB: _pack_signal_job_msg((signal_job_msg_t *) msg->data, buffer); break; + case REQUEST_ABORT_JOB: case REQUEST_KILL_TIMELIMIT: case REQUEST_TERMINATE_JOB: _pack_kill_job_msg((kill_job_msg_t *) msg->data, buffer); @@ -928,6 +929,7 @@ unpack_msg(slurm_msg_t * msg, Buf buffer) rc = _unpack_signal_job_msg((signal_job_msg_t **)&(msg->data), buffer); break; + case REQUEST_ABORT_JOB: case REQUEST_KILL_TIMELIMIT: case REQUEST_TERMINATE_JOB: rc = _unpack_kill_job_msg((kill_job_msg_t **) & (msg->data), @@ -3032,10 +3034,12 @@ static void _pack_task_exit_msg(task_exit_msg_t * msg, Buf buffer) { xassert(msg != NULL); - pack32((uint32_t)msg->return_code, buffer); - pack32((uint32_t)msg->num_tasks, buffer); + pack32(msg->return_code, buffer); + pack32(msg->num_tasks, buffer); pack32_array(msg->task_id_list, msg->num_tasks, buffer); + pack32(msg->job_id, buffer); + pack32(msg->step_id, buffer); } static int @@ -3053,6 +3057,8 @@ _unpack_task_exit_msg(task_exit_msg_t ** msg_ptr, Buf buffer) safe_unpack32_array(&msg->task_id_list, &uint32_tmp, buffer); if (msg->num_tasks != uint32_tmp) goto unpack_error; + safe_unpack32(&msg->job_id, buffer); + safe_unpack32(&msg->step_id, buffer); return SLURM_SUCCESS; unpack_error: @@ -3365,7 +3371,7 @@ unpack_error: static void _pack_shutdown_msg(shutdown_msg_t * msg, Buf buffer) { - pack16((uint16_t)msg->core, buffer); + pack16((uint16_t)msg->options, buffer); } static int @@ -3376,7 +3382,7 @@ _unpack_shutdown_msg(shutdown_msg_t ** msg_ptr, Buf buffer) msg = xmalloc(sizeof(shutdown_msg_t)); *msg_ptr = msg; - safe_unpack16(&msg->core, buffer); + safe_unpack16(&msg->options, buffer); return SLURM_SUCCESS; unpack_error: diff --git a/src/common/slurmdbd_defs.c b/src/common/slurmdbd_defs.c index 25818bcf5..c527f6f20 100644 --- a/src/common/slurmdbd_defs.c +++ b/src/common/slurmdbd_defs.c @@ -71,9 +71,10 @@ #include "src/common/xsignal.h" #include "src/common/xstring.h" -#define DBD_MAGIC 0xDEAD3219 -#define MAX_AGENT_QUEUE 10000 -#define MAX_DBD_MSG_LEN 16384 +#define DBD_MAGIC 0xDEAD3219 +#define MAX_AGENT_QUEUE 10000 +#define MAX_DBD_MSG_LEN 16384 +#define SLURMDBD_TIMEOUT 60 /* Seconds SlurmDBD for response */ static pthread_mutex_t agent_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t agent_cond = PTHREAD_COND_INITIALIZER; @@ -90,14 +91,14 @@ static void * _agent(void *x); static void _agent_queue_del(void *x); static void _close_slurmdbd_fd(void); static void _create_agent(void); -static bool _fd_readable(slurm_fd fd); +static bool _fd_readable(slurm_fd fd, int read_timeout); static int _fd_writeable(slurm_fd fd); -static int _get_return_code(void); +static int _get_return_code(int read_timeout); static Buf _load_dbd_rec(int fd); static void _load_dbd_state(void); static void _open_slurmdbd_fd(void); static int _purge_job_start_req(void); -static Buf _recv_msg(void); +static Buf _recv_msg(int read_timeout); static void _reopen_slurmdbd_fd(void); static int _save_dbd_rec(int fd, Buf buffer); static void _save_dbd_state(void); @@ -200,12 +201,13 @@ extern int slurm_send_slurmdbd_recv_rc_msg(slurmdbd_msg_t *req, int *resp_code) extern int slurm_send_recv_slurmdbd_msg(slurmdbd_msg_t *req, slurmdbd_msg_t *resp) { - int rc = SLURM_SUCCESS; + int rc = SLURM_SUCCESS, read_timeout; Buf buffer; xassert(req); xassert(resp); + read_timeout = SLURMDBD_TIMEOUT * 1000; slurm_mutex_lock(&slurmdbd_lock); if (slurmdbd_fd < 0) { /* Either slurm_open_slurmdbd_conn() was not executed or @@ -227,7 +229,7 @@ extern int slurm_send_recv_slurmdbd_msg(slurmdbd_msg_t *req, return SLURM_ERROR; } - buffer = _recv_msg(); + buffer = _recv_msg(read_timeout); if (buffer == NULL) { error("slurmdbd: Getting response to message type %u", req->msg_type); @@ -288,7 +290,7 @@ extern int slurm_send_slurmdbd_msg(slurmdbd_msg_t *req) } /* Open a connection to the Slurm DBD and set slurmdbd_fd */ -static void _open_slurmdbd_fd() +static void _open_slurmdbd_fd(void) { slurm_addr dbd_addr; uint16_t slurmdbd_port; @@ -301,13 +303,12 @@ static void _open_slurmdbd_fd() slurmdbd_host = slurm_get_accounting_storage_host(); slurmdbd_port = slurm_get_accounting_storage_port(); - if ((slurmdbd_host == NULL) || (slurmdbd_port == 0)) { - error("Invalid SlurmDbd address %s:%u", - slurmdbd_host, slurmdbd_port); - xfree(slurmdbd_host); - return; - } - + if (slurmdbd_host == NULL) + slurmdbd_host = xstrdup(DEFAULT_STORAGE_HOST); + + if (slurmdbd_port == 0) + slurmdbd_port = SLURMDBD_PORT; + slurm_set_addr(&dbd_addr, slurmdbd_port, slurmdbd_host); if (dbd_addr.sin_port == 0) error("Unable to locate SlurmDBD host %s:%u", @@ -441,9 +442,10 @@ extern Buf pack_slurmdbd_msg(slurmdbd_msg_t *req) req->data, buffer); break; default: - error("slurmdbd: Invalid message type pack %u(%s)", + error("slurmdbd: Invalid message type pack %u(%s:%u)", req->msg_type, - slurmdbd_msg_type_2_str(req->msg_type)); + slurmdbd_msg_type_2_str(req->msg_type, 1), + req->msg_type); free_buf(buffer); return NULL; } @@ -571,7 +573,8 @@ extern int unpack_slurmdbd_msg(slurmdbd_msg_t *resp, Buf buffer) default: error("slurmdbd: Invalid message type unpack %u(%s)", resp->msg_type, - slurmdbd_msg_type_2_str(resp->msg_type)); + slurmdbd_msg_type_2_str(resp->msg_type, 1), + resp->msg_type); return SLURM_ERROR; } return rc; @@ -693,161 +696,314 @@ extern slurmdbd_msg_type_t str_2_slurmdbd_msg_type(char *msg_type) return NO_VAL; } -extern char *slurmdbd_msg_type_2_str(slurmdbd_msg_type_t msg_type) +extern char *slurmdbd_msg_type_2_str(slurmdbd_msg_type_t msg_type, int get_enum) { switch(msg_type) { case DBD_INIT: - return "Init"; + if(get_enum) { + return "DBD_INIT"; + } else + return "Init"; break; case DBD_FINI: - return "Fini"; + if(get_enum) { + return "DBD_FINI"; + } else + return "Fini"; break; case DBD_ADD_ACCOUNTS: - return "Add Accounts"; + if(get_enum) { + return "DBD_ADD_ACCOUNTS"; + } else + return "Add Accounts"; break; case DBD_ADD_ACCOUNT_COORDS: - return "Add Account Coord"; + if(get_enum) { + return "DBD_ADD_ACCOUNT_COORDS"; + } else + return "Add Account Coord"; break; case DBD_ADD_ASSOCS: - return "Add Associations"; + if(get_enum) { + return "DBD_ADD_ASSOCS"; + } else + return "Add Associations"; break; case DBD_ADD_CLUSTERS: - return "Add Clusters"; + if(get_enum) { + return "DBD_ADD_CLUSTERS"; + } else + return "Add Clusters"; break; case DBD_ADD_USERS: - return "Add Users"; + if(get_enum) { + return "DBD_ADD_USERS"; + } else + return "Add Users"; break; case DBD_CLUSTER_PROCS: - return "Cluster Processors"; + if(get_enum) { + return "DBD_CLUSTER_PROCS"; + } else + return "Cluster Processors"; break; case DBD_FLUSH_JOBS: - return "Flush Jobs"; + if(get_enum) { + return "DBD_FLUSH_JOBS"; + } else + return "Flush Jobs"; break; case DBD_GET_ACCOUNTS: - return "Get Accounts"; + if(get_enum) { + return "DBD_GET_ACCOUNTS"; + } else + return "Get Accounts"; break; case DBD_GET_ASSOCS: - return "Get Associations"; + if(get_enum) { + return "DBD_GET_ASSOCS"; + } else + return "Get Associations"; break; case DBD_GET_ASSOC_USAGE: - return "Get Association Usage"; + if(get_enum) { + return "DBD_GET_ASSOC_USAGE"; + } else + return "Get Association Usage"; break; case DBD_GET_CLUSTERS: - return "Get Clusters"; + if(get_enum) { + return "DBD_GET_CLUSTERS"; + } else + return "Get Clusters"; break; case DBD_GET_CLUSTER_USAGE: - return "Get Cluster Usage"; + if(get_enum) { + return "DBD_GET_CLUSTER_USAGE"; + } else + return "Get Cluster Usage"; break; case DBD_GET_JOBS: - return "Get Jobs"; + if(get_enum) { + return "DBD_GET_JOBS"; + } else + return "Get Jobs"; break; case DBD_GET_USERS: - return "Get Users"; + if(get_enum) { + return "DBD_GET_USERS"; + } else + return "Get Users"; break; case DBD_GOT_ACCOUNTS: - return "Got Accounts"; + if(get_enum) { + return "DBD_GOT_ACCOUNTS"; + } else + return "Got Accounts"; break; case DBD_GOT_ASSOCS: - return "Got Associations"; + if(get_enum) { + return "DBD_GOT_ASSOCS"; + } else + return "Got Associations"; break; case DBD_GOT_ASSOC_USAGE: - return "Got Association Usage"; + if(get_enum) { + return "DBD_GOT_ASSOC_USAGE"; + } else + return "Got Association Usage"; break; case DBD_GOT_CLUSTERS: - return "Got Clusters"; + if(get_enum) { + return "DBD_GOT_CLUSTERS"; + } else + return "Got Clusters"; break; case DBD_GOT_CLUSTER_USAGE: - return "Got Cluster Usage"; + if(get_enum) { + return "DBD_GOT_CLUSTER_USAGE"; + } else + return "Got Cluster Usage"; break; case DBD_GOT_JOBS: - return "Got Jobs"; + if(get_enum) { + return "DBD_GOT_JOBS"; + } else + return "Got Jobs"; break; case DBD_GOT_LIST: - return "Got List"; + if(get_enum) { + return "DBD_GOT_LIST"; + } else + return "Got List"; break; case DBD_GOT_USERS: - return "Got Users"; + if(get_enum) { + return "DBD_GOT_USERS"; + } else + return "Got Users"; break; case DBD_JOB_COMPLETE: - return "Job Complete"; + if(get_enum) { + return "DBD_JOB_COMPLETE"; + } else + return "Job Complete"; break; case DBD_JOB_START: - return "Job Start"; + if(get_enum) { + return "DBD_JOB_START"; + } else + return "Job Start"; break; case DBD_JOB_START_RC: - return "Job Start RC"; + if(get_enum) { + return "DBD_JOB_START_RC"; + } else + return "Job Start RC"; break; case DBD_JOB_SUSPEND: - return "Job Suspend"; + if(get_enum) { + return "DBD_JOB_SUSPEND"; + } else + return "Job Suspend"; break; case DBD_MODIFY_ACCOUNTS: - return "Modify Accounts"; + if(get_enum) { + return "DBD_MODIFY_ACCOUNTS"; + } else + return "Modify Accounts"; break; case DBD_MODIFY_ASSOCS: - return "Modify Associations"; + if(get_enum) { + return "DBD_MODIFY_ASSOCS"; + } else + return "Modify Associations"; break; case DBD_MODIFY_CLUSTERS: - return "Modify Clusters"; + if(get_enum) { + return "DBD_MODIFY_CLUSTERS"; + } else + return "Modify Clusters"; break; case DBD_MODIFY_USERS: - return "Modify Users"; + if(get_enum) { + return "DBD_MODIFY_USERS"; + } else + return "Modify Users"; break; case DBD_NODE_STATE: - return "Node State"; + if(get_enum) { + return "DBD_NODE_STATE"; + } else + return "Node State"; break; case DBD_RC: - return "RC"; + if(get_enum) { + return "DBD_RC"; + } else + return "Return Code"; break; case DBD_REGISTER_CTLD: - return "Register Cluster"; + if(get_enum) { + return "DBD_REGISTER_CTLD"; + } else + return "Register Cluster"; break; case DBD_REMOVE_ACCOUNTS: - return "Remove Accounts"; + if(get_enum) { + return "DBD_REMOVE_ACCOUNTS"; + } else + return "Remove Accounts"; break; case DBD_REMOVE_ACCOUNT_COORDS: - return "Remove Account Coords"; + if(get_enum) { + return "DBD_REMOVE_ACCOUNT_COORDS"; + } else + return "Remove Account Coords"; break; case DBD_REMOVE_ASSOCS: - return "Remove Associations"; + if(get_enum) { + return "DBD_REMOVE_ASSOCS"; + } else + return "Remove Associations"; break; case DBD_REMOVE_CLUSTERS: - return "Remove Clusters"; + if(get_enum) { + return "DBD_REMOVE_CLUSTERS"; + } else + return "Remove Clusters"; break; case DBD_REMOVE_USERS: - return "Remove Users"; + if(get_enum) { + return "DBD_REMOVE_USERS"; + } else + return "Remove Users"; break; case DBD_ROLL_USAGE: - return "Roll Usage"; + if(get_enum) { + return "DBD_ROLL_USAGE"; + } else + return "Roll Usage"; break; case DBD_STEP_COMPLETE: - return "Step Complete"; + if(get_enum) { + return "DBD_STEP_COMPLETE"; + } else + return "Step Complete"; break; case DBD_STEP_START: - return "Step Start"; + if(get_enum) { + return "DBD_STEP_START"; + } else + return "Step Start"; break; case DBD_UPDATE_SHARES_USED: - return "Update Shares Used"; + if(get_enum) { + return "DBD_UPDATE_SHARES_USED"; + } else + return "Update Shares Used"; break; case DBD_GET_JOBS_COND: - return "Get Jobs Conditional"; + if(get_enum) { + return "DBD_GET_JOBS_COND"; + } else + return "Get Jobs Conditional"; break; case DBD_GET_TXN: - return "Get Transations"; + if(get_enum) { + return "DBD_GET_TXN"; + } else + return "Get Transations"; break; case DBD_GOT_TXN: - return "Got Transations"; + if(get_enum) { + return "DBD_GOT_TXN"; + } else + return "Got Transations"; break; case DBD_ADD_QOS: - return "Add QOS"; + if(get_enum) { + return "DBD_ADD_QOS"; + } else + return "Add QOS"; break; case DBD_GET_QOS: - return "Get QOS"; + if(get_enum) { + return "DBD_GET_QOS"; + } else + return "Get QOS"; break; case DBD_GOT_QOS: - return "Got QOS"; + if(get_enum) { + return "DBD_GOT_QOS"; + } else + return "Got QOS"; break; case DBD_REMOVE_QOS: - return "Remove QOS"; + if(get_enum) { + return "DBD_REMOVE_QOS"; + } else + return "Remove QOS"; break; default: return "Unknown"; @@ -859,7 +1015,7 @@ extern char *slurmdbd_msg_type_2_str(slurmdbd_msg_type_t msg_type) static int _send_init_msg(void) { - int rc; + int rc, read_timeout; Buf buffer; dbd_init_msg_t req; @@ -876,7 +1032,8 @@ static int _send_init_msg(void) return rc; } - rc = _get_return_code(); + read_timeout = slurm_get_msg_timeout() * 1000; + rc = _get_return_code(read_timeout); return rc; } @@ -958,7 +1115,7 @@ static int _send_msg(Buf buffer) return SLURM_SUCCESS; } -static int _get_return_code(void) +static int _get_return_code(int read_timeout) { Buf buffer; uint16_t msg_type; @@ -966,7 +1123,7 @@ static int _get_return_code(void) dbd_job_start_rc_msg_t *js_msg; int rc = SLURM_ERROR; - buffer = _recv_msg(); + buffer = _recv_msg(read_timeout); if (buffer == NULL) return rc; @@ -985,9 +1142,20 @@ static int _get_return_code(void) case DBD_RC: if (slurmdbd_unpack_rc_msg(&msg, buffer) == SLURM_SUCCESS) { rc = msg->return_code; - if (rc != SLURM_SUCCESS) - error("slurmdbd: DBD_RC is %d from %u: %s", - rc, msg->sent_type, msg->comment); + if (rc != SLURM_SUCCESS) { + error("slurmdbd: DBD_RC is %d from %s(%u): %s", + rc, + slurmdbd_msg_type_2_str(msg->sent_type, + 1), + msg->sent_type, + msg->comment); + if(msg->sent_type == DBD_REGISTER_CTLD && + slurm_get_accounting_storage_enforce()) + fatal("You need to add this cluster " + "to accounting if you want to " + "enforce associations, or no " + "jobs will ever run."); + } slurmdbd_free_rc_msg(msg); } else error("slurmdbd: unpack message error"); @@ -1001,7 +1169,7 @@ unpack_error: return rc; } -static Buf _recv_msg(void) +static Buf _recv_msg(int read_timeout) { uint32_t msg_size, nw_size; char *msg; @@ -1011,7 +1179,7 @@ static Buf _recv_msg(void) if (slurmdbd_fd < 0) return NULL; - if (!_fd_readable(slurmdbd_fd)) + if (!_fd_readable(slurmdbd_fd, read_timeout)) return NULL; msg_read = read(slurmdbd_fd, &nw_size, sizeof(nw_size)); if (msg_read != sizeof(nw_size)) @@ -1027,7 +1195,7 @@ static Buf _recv_msg(void) msg = xmalloc(msg_size); offset = 0; while (msg_size > offset) { - if (!_fd_readable(slurmdbd_fd)) + if (!_fd_readable(slurmdbd_fd, read_timeout)) break; /* problem with this socket */ msg_read = read(slurmdbd_fd, (msg + offset), (msg_size - offset)); @@ -1066,21 +1234,17 @@ static int _tot_wait (struct timeval *start_time) /* Wait until a file is readable, * RET false if can not be read */ -static bool _fd_readable(slurm_fd fd) +static bool _fd_readable(slurm_fd fd, int read_timeout) { struct pollfd ufds; - static int msg_timeout = -1; int rc, time_left; struct timeval tstart; - if (msg_timeout == -1) - msg_timeout = slurm_get_msg_timeout() * 1000; - ufds.fd = fd; ufds.events = POLLIN; gettimeofday(&tstart, NULL); while (agent_shutdown == 0) { - time_left = msg_timeout - _tot_wait(&tstart); + time_left = read_timeout - _tot_wait(&tstart); rc = poll(&ufds, 1, time_left); if (rc == -1) { if ((errno == EINTR) || (errno == EAGAIN)) @@ -1121,7 +1285,7 @@ static bool _fd_readable(slurm_fd fd) static int _fd_writeable(slurm_fd fd) { struct pollfd ufds; - int msg_timeout = 5000; + int write_timeout = 5000; int rc, time_left; struct timeval tstart; @@ -1129,7 +1293,7 @@ static int _fd_writeable(slurm_fd fd) ufds.events = POLLOUT; gettimeofday(&tstart, NULL); while (agent_shutdown == 0) { - time_left = msg_timeout - _tot_wait(&tstart); + time_left = write_timeout - _tot_wait(&tstart); rc = poll(&ufds, 1, time_left); if (rc == -1) { if ((errno == EINTR) || (errno == EAGAIN)) @@ -1237,7 +1401,8 @@ static void *_agent(void *x) struct timespec abs_time; static time_t fail_time = 0; int sigarray[] = {SIGUSR1, 0}; - + int read_timeout = SLURMDBD_TIMEOUT * 1000; + /* Prepare to catch SIGUSR1 to interrupt pending * I/O and terminate in a timely fashion. */ xsignal(SIGUSR1, _sig_handler); @@ -1290,7 +1455,7 @@ static void *_agent(void *x) break; error("slurmdbd: Failure sending message"); } else { - rc = _get_return_code(); + rc = _get_return_code(read_timeout); if (rc == EAGAIN) { if (agent_shutdown) break; @@ -2039,6 +2204,12 @@ slurmdbd_unpack_init_msg(dbd_init_msg_t **msg, Buf buffer, char *auth_info) goto unpack_error; } msg_ptr->uid = g_slurm_auth_get_uid(auth_cred, auth_info); + if(g_slurm_auth_errno(auth_cred) != SLURM_SUCCESS) { + error("Bad authentication: %s", + g_slurm_auth_errstr(g_slurm_auth_errno(auth_cred))); + goto unpack_error; + } + g_slurm_auth_destroy(auth_cred); return SLURM_SUCCESS; diff --git a/src/common/slurmdbd_defs.h b/src/common/slurmdbd_defs.h index 8fa02c784..4ddc04264 100644 --- a/src/common/slurmdbd_defs.h +++ b/src/common/slurmdbd_defs.h @@ -355,7 +355,8 @@ extern Buf pack_slurmdbd_msg(slurmdbd_msg_t *req); extern int unpack_slurmdbd_msg(slurmdbd_msg_t *resp, Buf buffer); extern slurmdbd_msg_type_t str_2_slurmdbd_msg_type(char *msg_type); -extern char *slurmdbd_msg_type_2_str(slurmdbd_msg_type_t msg_type); +extern char *slurmdbd_msg_type_2_str(slurmdbd_msg_type_t msg_type, + int get_enum); /*****************************************************************************\ * Free various SlurmDBD message structures diff --git a/src/common/uid.c b/src/common/uid.c index 299c98406..5418fd837 100644 --- a/src/common/uid.c +++ b/src/common/uid.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/common/uid.c - uid/gid lookup utility functions - * $Id: uid.c 13672 2008-03-19 23:10:58Z jette $ + * $Id: uid.c 14795 2008-08-15 21:54:22Z jette $ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -41,48 +41,132 @@ #include <grp.h> #include <ctype.h> -#include "uid.h" +#include "src/common/uid.h" +#include "src/common/xmalloc.h" +#include "src/common/xstring.h" uid_t uid_from_string (char *name) { - struct passwd *pwd = NULL; - char *p = NULL; + struct passwd pwd, *result; + char buffer[PW_BUF_SIZE], *p = NULL; + int rc; uid_t uid = (uid_t) strtoul (name, &p, 10); - if (*p != '\0') - pwd = getpwnam (name); - else - pwd = getpwuid (uid); - - return pwd ? pwd->pw_uid : (uid_t) -1; + if (*p != '\0') { + while (1) { + rc = getpwnam_r(name, &pwd, buffer, PW_BUF_SIZE, + &result); + if (rc == EINTR) + continue; + if (rc != 0) + result = NULL; + break; + } + if (result == NULL) + uid = (uid_t) -1; + else + uid = result->pw_uid; + } else { + while (1) { + rc = getpwuid_r(uid, &pwd, buffer, PW_BUF_SIZE, + &result); + if (rc == EINTR) + continue; + if (rc != 0) + result = NULL; + break; + } + if (result == NULL) + uid = (uid_t) -1; + /* else uid is already correct */ + } + return uid; } char * uid_to_string (uid_t uid) { - struct passwd *pwd = NULL; + struct passwd pwd, *result; + char buffer[PW_BUF_SIZE], *ustring; + int rc; /* Suse Linux does not handle multiple users with UID=0 well */ if (uid == 0) - return "root"; + return xstrdup("root"); - pwd = getpwuid(uid); - return pwd ? pwd->pw_name : "nobody"; + while (1) { + rc = getpwuid_r(uid, &pwd, buffer, PW_BUF_SIZE, &result); + if (rc == EINTR) + continue; + if (rc != 0) + result = NULL; + break; + } + if (result) + ustring = xstrdup(result->pw_name); + else + ustring = xstrdup("nobody"); + return ustring; } gid_t gid_from_string (char *name) { - struct group *g = NULL; - char *p = NULL; + struct group grp, *result; + char buffer[PW_BUF_SIZE], *p = NULL; + int rc; gid_t gid = (gid_t) strtoul (name, &p, 10); - if (*p != '\0') - g = getgrnam (name); - else - g = getgrgid (gid); - - return g ? g->gr_gid : (gid_t) -1; + if (*p != '\0') { + while (1) { + rc = getgrnam_r(name, &grp, buffer, PW_BUF_SIZE, + &result); + if (rc == EINTR) + continue; + if (rc != 0) + result = NULL; + break; + } + if (result == NULL) + gid = (gid_t) -1; + else + gid = result->gr_gid; + } else { + while (1) { + rc = getgrgid_r(gid, &grp, buffer, PW_BUF_SIZE, + &result); + if (rc == EINTR) + continue; + if (rc != 0) + result = NULL; + break; + } + if (result == NULL) + gid = (gid_t) -1; + /* else gid is already correct */ + } + return gid; } +char * +gid_to_string (gid_t gid) +{ + struct group grp, *result; + char buffer[PW_BUF_SIZE], *gstring; + int rc; + + while (1) { + rc = getgrgid_r(gid, &grp, buffer, PW_BUF_SIZE, &result); + if (rc == EINTR) + continue; + if (rc != 0) + result = NULL; + break; + } + if (result) + gstring = xstrdup(result->gr_name); + else + gstring = xstrdup("nobody"); + return gstring; +} diff --git a/src/common/uid.h b/src/common/uid.h index a7dabb2ca..9ded73457 100644 --- a/src/common/uid.h +++ b/src/common/uid.h @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/common/uid.h - uid/gid lookup utility functions - * $Id: uid.h 13672 2008-03-19 23:10:58Z jette $ + * $Id: uid.h 14795 2008-08-15 21:54:22Z jette $ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -40,6 +40,15 @@ #define __SLURM_UID_UTILITY_H__ #include <sys/types.h> +#include <unistd.h> + +/* + * In an ideal world, we could use sysconf(_SC_GETPW_R_SIZE_MAX) to get the + * maximum buffer size neede for getpwnam_r(), but if there is no maximum + * value configured, the value returned is 1024, which can too small. + * Diito for _SC_GETGR_R_SIZE_MAX. Use 64k byte buffer by default. + */ +#define PW_BUF_SIZE 65536 /* * Return validated uid_t for string in ``name'' which contains @@ -56,8 +65,14 @@ uid_t uid_from_string (char *name); gid_t gid_from_string (char *name); /* - * Translate uid to user name + * Translate uid to user name, + * NOTE: xfree the return value */ char *uid_to_string (uid_t uid); +/* + * Same as uid_to_string, but for group name. + * NOTE: xfree the return value + */ +char *gid_to_string (gid_t gid); #endif /*__SLURM_UID_UTILITY_H__*/ diff --git a/src/database/gold_interface.c b/src/database/gold_interface.c index f39ad68a0..080f4e495 100644 --- a/src/database/gold_interface.c +++ b/src/database/gold_interface.c @@ -319,7 +319,7 @@ extern gold_response_t *get_gold_response(gold_request_t *gold_request) unsigned int dlen = SHA_DIGEST_LENGTH; unsigned char digest[dlen]; unsigned char signature[slen]; - char c; + char c, *user_name; char *object = NULL; char *action = NULL; char *innerds = NULL; @@ -431,10 +431,12 @@ extern gold_response_t *get_gold_response(gold_request_t *gold_request) } list_iterator_destroy(itr); + user_name = uid_to_string(geteuid()); xstrfmtcat(gold_request->body, "<Body><Request action=\"%s\" actor=\"%s\">" "<Object>%s</Object>", - action, uid_to_string(geteuid()), object); + action, user_name, object); + xfree(user_name); if(innerds) { xstrcat(gold_request->body, innerds); xfree(innerds); diff --git a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c index 22c52c033..fff6191be 100644 --- a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c +++ b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c @@ -186,7 +186,12 @@ extern int init ( void ) "Please use a database plugin"); } - if(first) { + /* This check for the slurm user id is a quick and dirty patch + * to see if the controller is calling this, since we open the + * file in append mode sacct could fail on it if the file + * isn't world writable. + */ + if(first && (getuid() == slurm_get_slurm_user_id())) { debug2("jobacct_init() called"); log_file = slurm_get_accounting_storage_loc(); if(!log_file) diff --git a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c index db3ec5d2e..eb17140be 100644 --- a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c +++ b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c @@ -749,15 +749,15 @@ static int _parse_line(char *f[], void **data, int len) (*step)->rusage.ru_nsignals = atoi(f[F_NSIGNALS]); (*step)->rusage.ru_nvcsw = atoi(f[F_NVCSW]); (*step)->rusage.ru_nivcsw = atoi(f[F_NIVCSW]); - (*step)->sacct.max_vsize = atoi(f[F_MAX_VSIZE]) * 1024; + (*step)->sacct.max_vsize = atoi(f[F_MAX_VSIZE]); if(len > F_STEPNODES) { (*step)->sacct.max_vsize_id.taskid = atoi(f[F_MAX_VSIZE_TASK]); - (*step)->sacct.ave_vsize = atof(f[F_AVE_VSIZE]) * 1024; - (*step)->sacct.max_rss = atoi(f[F_MAX_RSS]) * 1024; + (*step)->sacct.ave_vsize = atof(f[F_AVE_VSIZE]); + (*step)->sacct.max_rss = atoi(f[F_MAX_RSS]); (*step)->sacct.max_rss_id.taskid = atoi(f[F_MAX_RSS_TASK]); - (*step)->sacct.ave_rss = atof(f[F_AVE_RSS]) * 1024; + (*step)->sacct.ave_rss = atof(f[F_AVE_RSS]); (*step)->sacct.max_pages = atoi(f[F_MAX_PAGES]); (*step)->sacct.max_pages_id.taskid = atoi(f[F_MAX_PAGES_TASK]); @@ -771,13 +771,13 @@ static int _parse_line(char *f[], void **data, int len) } else { (*step)->sacct.max_vsize_id.taskid = (uint16_t)NO_VAL; (*step)->sacct.ave_vsize = (float)NO_VAL; - (*step)->sacct.max_rss = (uint32_t)NO_VAL; + (*step)->sacct.max_rss = NO_VAL; (*step)->sacct.max_rss_id.taskid = (uint16_t)NO_VAL; (*step)->sacct.ave_rss = (float)NO_VAL; - (*step)->sacct.max_pages = (uint32_t)NO_VAL; + (*step)->sacct.max_pages = NO_VAL; (*step)->sacct.max_pages_id.taskid = (uint16_t)NO_VAL; (*step)->sacct.ave_pages = (float)NO_VAL; - (*step)->sacct.min_cpu = (uint32_t)NO_VAL; + (*step)->sacct.min_cpu = NO_VAL; (*step)->sacct.min_cpu_id.taskid = (uint16_t)NO_VAL; (*step)->sacct.ave_cpu = (float)NO_VAL; (*step)->stepname = NULL; @@ -793,14 +793,10 @@ static int _parse_line(char *f[], void **data, int len) (*step)->sacct.min_cpu_id.nodeid = atoi(f[F_MIN_CPU_NODE]); } else { - (*step)->sacct.max_vsize_id.nodeid = - (uint32_t)NO_VAL; - (*step)->sacct.max_rss_id.nodeid = - (uint32_t)NO_VAL; - (*step)->sacct.max_pages_id.nodeid = - (uint32_t)NO_VAL; - (*step)->sacct.min_cpu_id.nodeid = - (uint32_t)NO_VAL; + (*step)->sacct.max_vsize_id.nodeid = NO_VAL; + (*step)->sacct.max_rss_id.nodeid = NO_VAL; + (*step)->sacct.max_pages_id.nodeid = NO_VAL; + (*step)->sacct.min_cpu_id.nodeid = NO_VAL; } if(len > F_STEP_ACCOUNT) (*step)->account = xstrdup(f[F_STEP_ACCOUNT]); @@ -1467,6 +1463,17 @@ extern void filetxt_jobacct_process_archive(List selected_parts, } list_iterator_destroy(itr); + /* write records in other_list to new log */ + itr = list_iterator_create(other_list); + while((exp_rec = list_next(itr))) { + if (fputs(exp_rec->line, new_logfile)<0) { + perror("writing keep_logfile"); + list_iterator_destroy(itr); + goto finished2; + } + } + list_iterator_destroy(itr); + if (rename(params->opt_filein, old_logfile_name)) { perror("renaming logfile to .old."); goto finished2; @@ -1503,6 +1510,13 @@ extern void filetxt_jobacct_process_archive(List selected_parts, perror("looking for late-arriving records"); goto finished2; } + + /* reopen new logfile in append mode, since slurmctld may write it */ + if (freopen(params->opt_filein, "a", new_logfile) == NULL) { + perror("reopening new logfile"); + goto finished2; + } + while (fgets(line, BUFFER_SIZE, fd)) { if (fputs(line, new_logfile)<0) { perror("writing final records"); diff --git a/src/plugins/accounting_storage/gold/accounting_storage_gold.c b/src/plugins/accounting_storage/gold/accounting_storage_gold.c index bd9629fd9..2d5a9db44 100644 --- a/src/plugins/accounting_storage/gold/accounting_storage_gold.c +++ b/src/plugins/accounting_storage/gold/accounting_storage_gold.c @@ -3204,19 +3204,31 @@ extern List jobacct_storage_p_get_jobs(void *db_conn, /* } */ if(account_rec.user) { - struct passwd *passwd_ptr = - getpwnam(account_rec. - user); + struct passwd pwd, *result; + size_t bufsize; + char *buffer; + int rc; + bufsize = sysconf( + _SC_GETPW_R_SIZE_MAX); + buffer = xmalloc(bufsize); + rc = getpwnam_r(account_rec. + user, + &pwd, buffer, + bufsize, + &result); + if (rc != 0) + result = NULL; job->user = xstrdup(account_rec. user); - if(passwd_ptr) { + if(result) { job->uid = - passwd_ptr-> + result-> pw_uid; job->gid = - passwd_ptr-> + result-> pw_gid; } + xfree(buffer); } if(account_rec.acct) job->account = diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c index 0e06f7ce4..af96b714e 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c @@ -51,6 +51,7 @@ #include "mysql_rollup.h" #include "src/common/slurmdbd_defs.h" #include "src/common/slurm_auth.h" +#include "src/common/uid.h" /* * These variables are required by the generic plugin interface. If they @@ -131,7 +132,9 @@ extern int clusteracct_storage_p_get_usage( mysql_conn_t *mysql_conn, acct_cluster_rec_t *cluster_rec, time_t start, time_t end); - +/* This should be added to the beginning of each function to make sure + * we have a connection to the database before we try to use it. + */ static int _check_connection(mysql_conn_t *mysql_conn) { if(!mysql_conn) { @@ -207,6 +210,9 @@ static int _addto_update_list(List update_list, acct_update_type_t type, return SLURM_SUCCESS; } +/* This should take care of all the lft and rgts when you move an + * account. This handles deleted associations also. + */ static int _move_account(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, char *cluster, char *id, char *parent) @@ -236,14 +242,23 @@ static int _move_account(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, } par_left = atoi(row[0]); mysql_free_result(result); - - width = (rgt - lft + 1); + diff = ((par_left + 1) - lft); + + if(diff == 0) { + debug3("Trying to move association to the same position? " + "Nothing to do."); + return rc; + } + width = (rgt - lft + 1); + + /* every thing below needs to be a %d not a %u because we are + looking for -1 */ xstrfmtcat(query, "update %s set deleted = deleted + 2, " "lft = lft + %d, rgt = rgt + %d " - "WHERE lft BETWEEN %u AND %u;", + "WHERE lft BETWEEN %d AND %d;", assoc_table, diff, diff, lft, rgt); xstrfmtcat(query, @@ -258,11 +273,11 @@ static int _move_account(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, xstrfmtcat(query, "UPDATE %s SET rgt = rgt - %d WHERE " - "(%d < 0 && rgt > %u && deleted < 2) " - "|| (%d >= 0 && rgt > %u);" + "(%d < 0 && rgt > %d && deleted < 2) " + "|| (%d > 0 && rgt > %d);" "UPDATE %s SET lft = lft - %d WHERE " - "(%d < 0 && lft > %u && deleted < 2) " - "|| (%d >= 0 && lft > %u);", + "(%d < 0 && lft > %d && deleted < 2) " + "|| (%d > 0 && lft > %d);", assoc_table, width, diff, rgt, diff, lft, @@ -283,6 +298,11 @@ static int _move_account(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, return rc; } + +/* This code will move an account from one parent to another. This + * should work either way in the tree. (i.e. move child to be parent + * of current parent, and parent to be child of child.) + */ static int _move_parent(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, char *cluster, char *id, char *old_parent, char *new_parent) @@ -295,11 +315,9 @@ static int _move_parent(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, ListIterator itr = NULL; acct_association_rec_t *assoc = NULL; - /* first we need to see if we are - * going to make a child of this - * account the new parent. If so we - * need to move that child to this - * accounts parent and then do the move + /* first we need to see if we are going to make a child of this + * account the new parent. If so we need to move that child to this + * accounts parent and then do the move. */ query = xstrdup_printf( "select id, lft, rgt from %s where lft between %d and %d " @@ -326,12 +344,33 @@ static int _move_parent(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, if(rc == SLURM_ERROR) return rc; - /* now move the one we wanted to move in the first place */ - rc = _move_account(mysql_conn, lft, rgt, cluster, id, new_parent); + /* now move the one we wanted to move in the first place + * We need to get the new lft and rgts though since they may + * have changed. + */ + query = xstrdup_printf( + "select lft, rgt from %s where id=%s;", + assoc_table, id); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + if(!(result = + mysql_db_query_ret(mysql_conn->db_conn, query, 0))) { + xfree(query); + return SLURM_ERROR; + } + xfree(query); + + if((row = mysql_fetch_row(result))) { + rc = _move_account(mysql_conn, atoi(row[0]), atoi(row[1]), + cluster, id, new_parent); + } else { + error("can't find parent? we were able to a second ago."); + rc = SLURM_ERROR; + } + mysql_free_result(result); if(rc == SLURM_ERROR) return rc; - + /* now we need to send the update of the new parents and * limits, so just to be safe, send the whole tree */ @@ -355,6 +394,8 @@ static int _move_parent(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, return rc; } +/* Let me know if the last statement had rows that were affected. + */ static int _last_affected_rows(MYSQL *mysql_db) { int status=0, rows=0; @@ -377,6 +418,9 @@ static int _last_affected_rows(MYSQL *mysql_db) return rows; } +/* This is called by most modify functions to alter the table and + * insert a new line in the transaction table. + */ static int _modify_common(mysql_conn_t *mysql_conn, uint16_t type, time_t now, @@ -415,6 +459,10 @@ static int _modify_common(mysql_conn_t *mysql_conn, return SLURM_SUCCESS; } +/* Used to get all the users inside a lft and rgt set. This is just + * to send the user all the associations that are being modified from + * a previous change to it's parent. + */ static int _modify_unset_users(mysql_conn_t *mysql_conn, acct_association_rec_t *assoc, char *acct, @@ -464,16 +512,13 @@ static int _modify_unset_users(mysql_conn_t *mysql_conn, xstrcat(object, assoc_req_inx[i]); } + /* We want all the sub accounts and user accounts */ query = xstrdup_printf("select distinct %s from %s where deleted=0 " "&& lft between %d and %d && " "((user = '' && parent_acct = '%s') || " "(user != '' && acct = '%s')) " "order by lft;", object, assoc_table, lft, rgt, acct, acct); -/* query = xstrdup_printf("select distinct %s from %s where deleted=0 " */ -/* "&& lft between %d and %d and user != ''" */ -/* "order by lft;", */ -/* object, assoc_table, lft, rgt); */ xfree(object); debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = @@ -521,9 +566,15 @@ static int _modify_unset_users(mysql_conn_t *mysql_conn, } else mod_assoc->max_cpu_secs_per_job = NO_VAL; - + /* We only want to add those that are modified here */ if(modified) { + /* Since we aren't really changing this non + * user association we don't want to send it. + */ if(!row[ASSOC_USER][0]) { + /* This is a sub account so run it + * through as if it is a parent. + */ _modify_unset_users(mysql_conn, mod_assoc, row[ASSOC_ACCT], @@ -533,7 +584,7 @@ static int _modify_unset_users(mysql_conn_t *mysql_conn, destroy_acct_association_rec(mod_assoc); continue; } - + /* We do want to send all user accounts though */ mod_assoc->fairshare = NO_VAL; if(row[ASSOC_PART][0]) { // see if there is a partition name @@ -600,6 +651,7 @@ static bool _check_jobs_before_remove(mysql_conn_t *mysql_conn, return rc; } +/* Same as above but for associations instead of other tables */ static bool _check_jobs_before_remove_assoc(mysql_conn_t *mysql_conn, char *assoc_char) { @@ -963,6 +1015,9 @@ just_update: return rc; } +/* Fill in all the users that are coordinator for this account. This + * will fill in if there are coordinators from a parent account also. + */ static int _get_account_coords(mysql_conn_t *mysql_conn, acct_account_rec_t *acct) { @@ -1019,6 +1074,9 @@ static int _get_account_coords(mysql_conn_t *mysql_conn, return SLURM_SUCCESS; } +/* Fill in all the accounts this user is coordinator over. This + * will fill in all the sub accounts they are coordinator over also. + */ static int _get_user_coords(mysql_conn_t *mysql_conn, acct_user_rec_t *user) { char *query = NULL; @@ -1099,6 +1157,9 @@ static int _get_user_coords(mysql_conn_t *mysql_conn, acct_user_rec_t *user) return SLURM_SUCCESS; } +/* Used in job functions for getting the database index based off the + * submit time, job and assoc id. + */ static int _get_db_index(MYSQL *db_conn, time_t submit, uint32_t jobid, uint32_t associd) { @@ -1141,6 +1202,7 @@ static mysql_db_info_t *_mysql_acct_create_db_info() return db_info; } +/* Any time a new table is added set it up here */ static int _mysql_acct_check_tables(MYSQL *db_conn) { int rc = SLURM_SUCCESS; @@ -1289,11 +1351,11 @@ static int _mysql_acct_check_tables(MYSQL *db_conn) { "user_usec", "int unsigned default 0 not null" }, { "sys_sec", "int unsigned default 0 not null" }, { "sys_usec", "int unsigned default 0 not null" }, - { "max_vsize", "mediumint unsigned default 0 not null" }, + { "max_vsize", "int unsigned default 0 not null" }, { "max_vsize_task", "smallint unsigned default 0 not null" }, { "max_vsize_node", "mediumint unsigned default 0 not null" }, { "ave_vsize", "float default 0.0 not null" }, - { "max_rss", "mediumint unsigned default 0 not null" }, + { "max_rss", "int unsigned default 0 not null" }, { "max_rss_task", "smallint unsigned default 0 not null" }, { "max_rss_node", "mediumint unsigned default 0 not null" }, { "ave_rss", "float default 0.0 not null" }, @@ -1380,7 +1442,9 @@ static int _mysql_acct_check_tables(MYSQL *db_conn) "UNTIL (@mj != -1 && @mnpj != -1 && @mwpj != -1 " "&& @mcpj != -1) || @my_acct = '' END REPEAT; " "END;"; - + char *query = NULL; + time_t now = time(NULL); + if(mysql_db_create_table(db_conn, acct_coord_table, acct_coord_table_fields, ", primary key (acct(20), user(20)))") @@ -1465,8 +1529,7 @@ static int _mysql_acct_check_tables(MYSQL *db_conn) == SLURM_ERROR) return SLURM_ERROR; else { - time_t now = time(NULL); - char *query = xstrdup_printf( + query = xstrdup_printf( "insert into %s " "(creation_time, mod_time, name, description) " "values (%d, %d, 'normal', 'Normal QOS default') " @@ -1496,6 +1559,28 @@ static int _mysql_acct_check_tables(MYSQL *db_conn) rc = mysql_db_query(db_conn, get_parent_proc); + /* Add user root to be a user by default and have this default + * account be root. If already there just update + * name='root'. That way if the admins delete it it will + * remained deleted. Creation time will be 0 so it will never + * really be deleted. + */ + query = xstrdup_printf( + "insert into %s (creation_time, mod_time, name, default_acct, " + "admin_level) values (0, %d, 'root', 'root', %u) " + "on duplicate key update name='root';", + user_table, now, ACCT_ADMIN_SUPER_USER, now); + xstrfmtcat(query, + "insert into %s (creation_time, mod_time, name, " + "description, organization) values (0, %d, 'root', " + "'default root account', 'root') on duplicate key " + "update name='root';", + acct_table, now); + + debug3("%s", query); + mysql_db_query(db_conn, query); + xfree(query); + return rc; } #endif @@ -1752,9 +1837,8 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid, int rc = SLURM_SUCCESS; acct_user_rec_t *object = NULL; char *cols = NULL, *vals = NULL, *query = NULL, *txn_query = NULL; - struct passwd *pw = NULL; time_t now = time(NULL); - char *user = NULL; + char *user_name = NULL; char *extra = NULL; int affect_rows = 0; List assoc_list = list_create(destroy_acct_association_rec); @@ -1762,10 +1846,7 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid, if(_check_connection(mysql_conn) != SLURM_SUCCESS) return SLURM_ERROR; - if((pw=getpwuid(uid))) { - user = pw->pw_name; - } - + user_name = uid_to_string((uid_t) uid); itr = list_iterator_create(user_list); while((object = list_next(itr))) { if(!object->name || !object->default_acct) { @@ -1806,6 +1887,8 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid, if(object->admin_level != ACCT_ADMIN_NOTSET) { xstrcat(cols, ", admin_level"); xstrfmtcat(vals, ", %u", object->admin_level); + xstrfmtcat(extra, ", admin_level=%u", + object->admin_level); } query = xstrdup_printf( @@ -1840,7 +1923,7 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid, xstrfmtcat(txn_query, ", (%d, %u, '%s', '%s', \"%s\")", now, DBD_ADD_USERS, object->name, - user, extra); + user_name, extra); else xstrfmtcat(txn_query, "insert into %s " @@ -1848,7 +1931,7 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid, "values (%d, %u, '%s', '%s', \"%s\")", txn_table, now, DBD_ADD_USERS, object->name, - user, extra); + user_name, extra); xfree(extra); if(!object->assoc_list) @@ -1857,6 +1940,7 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid, list_transfer(assoc_list, object->assoc_list); } list_iterator_destroy(itr); + xfree(user_name); if(rc != SLURM_ERROR) { if(txn_query) { @@ -1893,7 +1977,6 @@ extern int acct_storage_p_add_coord(mysql_conn_t *mysql_conn, uint32_t uid, #ifdef HAVE_MYSQL char *query = NULL, *user = NULL, *acct = NULL; char *user_name = NULL, *txn_query = NULL; - struct passwd *pw = NULL; ListIterator itr, itr2; time_t now = time(NULL); int rc = SLURM_SUCCESS; @@ -1910,10 +1993,7 @@ extern int acct_storage_p_add_coord(mysql_conn_t *mysql_conn, uint32_t uid, if(_check_connection(mysql_conn) != SLURM_SUCCESS) return SLURM_ERROR; - if((pw=getpwuid(uid))) { - user_name = pw->pw_name; - } - + user_name = uid_to_string((uid_t) uid); itr = list_iterator_create(user_cond->assoc_cond->user_list); itr2 = list_iterator_create(acct_list); while((user = list_next(itr))) { @@ -1946,10 +2026,10 @@ extern int acct_storage_p_add_coord(mysql_conn_t *mysql_conn, uint32_t uid, } list_iterator_reset(itr2); } + xfree(user_name); list_iterator_destroy(itr); list_iterator_destroy(itr2); - if(query) { xstrfmtcat(query, " on duplicate key update mod_time=%d, deleted=0;%s", @@ -1990,9 +2070,8 @@ extern int acct_storage_p_add_accts(mysql_conn_t *mysql_conn, uint32_t uid, int rc = SLURM_SUCCESS; acct_account_rec_t *object = NULL; char *cols = NULL, *vals = NULL, *query = NULL, *txn_query = NULL; - struct passwd *pw = NULL; time_t now = time(NULL); - char *user = NULL; + char *user_name = NULL; char *extra = NULL; int affect_rows = 0; List assoc_list = list_create(destroy_acct_association_rec); @@ -2000,10 +2079,7 @@ extern int acct_storage_p_add_accts(mysql_conn_t *mysql_conn, uint32_t uid, if(_check_connection(mysql_conn) != SLURM_SUCCESS) return SLURM_ERROR; - if((pw=getpwuid(uid))) { - user = pw->pw_name; - } - + user_name = uid_to_string((uid_t) uid); itr = list_iterator_create(acct_list); while((object = list_next(itr))) { if(!object->name || !object->description @@ -2066,7 +2142,7 @@ extern int acct_storage_p_add_accts(mysql_conn_t *mysql_conn, uint32_t uid, xstrfmtcat(txn_query, ", (%d, %u, '%s', '%s', \"%s\")", now, DBD_ADD_ACCOUNTS, object->name, - user, extra); + user_name, extra); else xstrfmtcat(txn_query, "insert into %s " @@ -2074,7 +2150,7 @@ extern int acct_storage_p_add_accts(mysql_conn_t *mysql_conn, uint32_t uid, "values (%d, %u, '%s', '%s', \"%s\")", txn_table, now, DBD_ADD_ACCOUNTS, object->name, - user, extra); + user_name, extra); xfree(extra); if(!object->assoc_list) @@ -2083,6 +2159,7 @@ extern int acct_storage_p_add_accts(mysql_conn_t *mysql_conn, uint32_t uid, list_transfer(assoc_list, object->assoc_list); } list_iterator_destroy(itr); + xfree(user_name); if(rc != SLURM_ERROR) { if(txn_query) { @@ -2122,18 +2199,28 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid, acct_cluster_rec_t *object = NULL; char *cols = NULL, *vals = NULL, *extra = NULL, *query = NULL; time_t now = time(NULL); - struct passwd *pw = NULL; - char *user = NULL; + char *user_name = NULL; int affect_rows = 0; int added = 0; + List assoc_list = NULL; + acct_association_rec_t *assoc = NULL; if(_check_connection(mysql_conn) != SLURM_SUCCESS) return SLURM_ERROR; - if((pw=getpwuid(uid))) { - user = pw->pw_name; - } - + assoc_list = list_create(destroy_acct_association_rec); + assoc = xmalloc(sizeof(acct_association_rec_t)); + list_append(assoc_list, assoc); + + assoc->user = xstrdup("root"); + assoc->acct = xstrdup("root"); + assoc->fairshare = NO_VAL; + assoc->max_cpu_secs_per_job = NO_VAL; + assoc->max_jobs = NO_VAL; + assoc->max_nodes_per_job = NO_VAL; + assoc->max_wall_duration_per_job = NO_VAL; + + user_name = uid_to_string((uid_t) uid); itr = list_iterator_create(cluster_list); while((object = list_next(itr))) { if(!object->name) { @@ -2268,8 +2355,8 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid, "insert into %s " "(timestamp, action, name, actor, info) " "values (%d, %u, '%s', '%s', \"%s\");", - txn_table, - now, DBD_ADD_CLUSTERS, object->name, user, extra); + txn_table, now, DBD_ADD_CLUSTERS, + object->name, user_name, extra); xfree(extra); debug4("query\n%s",query); rc = mysql_db_query(mysql_conn->db_conn, query); @@ -2278,8 +2365,23 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid, error("Couldn't add txn"); } else added++; + + /* Add user root by default to run from the root + * association + */ + xfree(assoc->cluster); + assoc->cluster = xstrdup(object->name); + if(acct_storage_p_add_associations(mysql_conn, uid, assoc_list) + == SLURM_ERROR) { + error("Problem adding root user association"); + rc = SLURM_ERROR; + } + } list_iterator_destroy(itr); + xfree(user_name); + + list_destroy(assoc_list); if(!added) { if(mysql_conn->rollback) { @@ -2307,8 +2409,7 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, *extra = NULL, *query = NULL, *update = NULL; char *parent = NULL; time_t now = time(NULL); - struct passwd *pw = NULL; - char *user = NULL; + char *user_name = NULL; char *tmp_char = NULL; int assoc_id = 0; int incr = 0, my_left = 0; @@ -2341,10 +2442,7 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, if(_check_connection(mysql_conn) != SLURM_SUCCESS) return SLURM_ERROR; - if((pw=getpwuid(uid))) { - user = pw->pw_name; - } - + user_name = uid_to_string((uid_t) uid); itr = list_iterator_create(association_list); while((object = list_next(itr))) { if(!object->cluster || !object->acct) { @@ -2374,23 +2472,21 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, xstrfmtcat(vals, ", '%s'", parent); xstrfmtcat(extra, ", parent_acct='%s'", parent); xstrfmtcat(update, " && user=''"); - } - - if(object->user) { + } else { + char *part = object->partition; xstrcat(cols, ", user"); xstrfmtcat(vals, ", '%s'", object->user); - xstrfmtcat(extra, ", user='%s'", object->user); xstrfmtcat(update, " && user='%s'", object->user); - - if(object->partition) { - xstrcat(cols, ", partition"); - xstrfmtcat(vals, ", '%s'", object->partition); - xstrfmtcat(extra, ", partition='%s'", - object->partition); - xstrfmtcat(update, " && partition='%s'", - object->partition); - } + + /* We need to give a partition wiether it be + * '' or the actual partition name given + */ + if(!part) + part = ""; + xstrcat(cols, ", partition"); + xstrfmtcat(vals, ", '%s'", part); + xstrfmtcat(update, " && partition='%s'", part); } if((int)object->fairshare >= 0) { @@ -2647,17 +2743,21 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, if(txn_query) xstrfmtcat(txn_query, ", (%d, %d, '%d', '%s', \"%s\")", - now, DBD_ADD_ASSOCS, assoc_id, user, extra); + now, DBD_ADD_ASSOCS, assoc_id, user_name, + extra); else xstrfmtcat(txn_query, "insert into %s " "(timestamp, action, name, actor, info) " "values (%d, %d, '%d', '%s', \"%s\")", txn_table, - now, DBD_ADD_ASSOCS, assoc_id, user, extra); + now, DBD_ADD_ASSOCS, assoc_id, user_name, + extra); xfree(extra); } list_iterator_destroy(itr); + xfree(user_name); + if(rc != SLURM_SUCCESS) goto end_it; @@ -2722,18 +2822,14 @@ extern int acct_storage_p_add_qos(mysql_conn_t *mysql_conn, uint32_t uid, acct_qos_rec_t *object = NULL; char *query = NULL; time_t now = time(NULL); - struct passwd *pw = NULL; - char *user = NULL; + char *user_name = NULL; int affect_rows = 0; int added = 0; if(_check_connection(mysql_conn) != SLURM_SUCCESS) return SLURM_ERROR; - if((pw=getpwuid(uid))) { - user = pw->pw_name; - } - + user_name = uid_to_string((uid_t) uid); itr = list_iterator_create(qos_list); while((object = list_next(itr))) { if(!object->name) { @@ -2771,7 +2867,7 @@ extern int acct_storage_p_add_qos(mysql_conn_t *mysql_conn, uint32_t uid, "(timestamp, action, name, actor, info) " "values (%d, %u, '%s', '%s', \"%s\");", txn_table, - now, DBD_ADD_QOS, object->name, user, + now, DBD_ADD_QOS, object->name, user_name, object->description); debug4("query\n%s",query); @@ -2789,6 +2885,7 @@ extern int acct_storage_p_add_qos(mysql_conn_t *mysql_conn, uint32_t uid, } list_iterator_destroy(itr); + xfree(user_name); if(!added) { if(mysql_conn->rollback) { @@ -2814,7 +2911,6 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, char *object = NULL; char *vals = NULL, *extra = NULL, *query = NULL, *name_char = NULL; time_t now = time(NULL); - struct passwd *pw = NULL; char *user_name = NULL; int set = 0; MYSQL_RES *result = NULL; @@ -2829,10 +2925,6 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, if(_check_connection(mysql_conn) != SLURM_SUCCESS) return NULL; - if((pw=getpwuid(uid))) { - user_name = pw->pw_name; - } - xstrcat(extra, "where deleted=0"); if(user_cond->assoc_cond && user_cond->assoc_cond->user_list && list_count(user_cond->assoc_cond->user_list)) { @@ -3015,16 +3107,17 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, xfree(query); xstrcat(name_char, ")"); - if(_modify_common(mysql_conn, DBD_MODIFY_USERS, now, - user_name, user_table, name_char, vals) - == SLURM_ERROR) { + user_name = uid_to_string((uid_t) uid); + rc = _modify_common(mysql_conn, DBD_MODIFY_USERS, now, + user_name, user_table, name_char, vals); + xfree(user_name); + xfree(name_char); + xfree(vals); + if (rc == SLURM_ERROR) { error("Couldn't modify users"); list_destroy(ret_list); ret_list = NULL; } - - xfree(name_char); - xfree(vals); return ret_list; #else @@ -3044,8 +3137,7 @@ extern List acct_storage_p_modify_accounts( char *object = NULL; char *vals = NULL, *extra = NULL, *query = NULL, *name_char = NULL; time_t now = time(NULL); - struct passwd *pw = NULL; - char *user = NULL; + char *user_name = NULL; int set = 0; MYSQL_RES *result = NULL; MYSQL_ROW row; @@ -3058,10 +3150,6 @@ extern List acct_storage_p_modify_accounts( if(_check_connection(mysql_conn) != SLURM_SUCCESS) return NULL; - if((pw=getpwuid(uid))) { - user = pw->pw_name; - } - xstrcat(extra, "where deleted=0"); if(acct_cond->assoc_cond && acct_cond->assoc_cond->acct_list @@ -3200,9 +3288,11 @@ extern List acct_storage_p_modify_accounts( xfree(query); xstrcat(name_char, ")"); - if(_modify_common(mysql_conn, DBD_MODIFY_ACCOUNTS, now, - user, acct_table, name_char, vals) - == SLURM_ERROR) { + user_name = uid_to_string((uid_t) uid); + rc = _modify_common(mysql_conn, DBD_MODIFY_ACCOUNTS, now, + user_name, acct_table, name_char, vals); + xfree(user_name); + if (rc == SLURM_ERROR) { error("Couldn't modify accounts"); list_destroy(ret_list); errno = SLURM_ERROR; @@ -3231,8 +3321,7 @@ extern List acct_storage_p_modify_clusters(mysql_conn_t *mysql_conn, char *vals = NULL, *extra = NULL, *query = NULL, *name_char = NULL, *assoc_char= NULL, *send_char = NULL; time_t now = time(NULL); - struct passwd *pw = NULL; - char *user = NULL; + char *user_name = NULL; int set = 0; MYSQL_RES *result = NULL; MYSQL_ROW row; @@ -3250,10 +3339,6 @@ extern List acct_storage_p_modify_clusters(mysql_conn_t *mysql_conn, if(_check_connection(mysql_conn) != SLURM_SUCCESS) return NULL; - if((pw=getpwuid(uid))) { - user = pw->pw_name; - } - xstrcat(extra, "where deleted=0"); if(cluster_cond->cluster_list && list_count(cluster_cond->cluster_list)) { @@ -3320,9 +3405,11 @@ extern List acct_storage_p_modify_clusters(mysql_conn_t *mysql_conn, if(vals) { send_char = xstrdup_printf("(%s)", name_char); - if(_modify_common(mysql_conn, DBD_MODIFY_CLUSTERS, now, - user, cluster_table, send_char, vals) - == SLURM_ERROR) { + user_name = uid_to_string((uid_t) uid); + rc = _modify_common(mysql_conn, DBD_MODIFY_CLUSTERS, now, + user_name, cluster_table, send_char, vals); + xfree(user_name); + if (rc == SLURM_ERROR) { error("Couldn't modify cluster 1"); list_destroy(ret_list); ret_list = NULL; @@ -3354,7 +3441,6 @@ extern List acct_storage_p_modify_associations( char *object = NULL; char *vals = NULL, *extra = NULL, *query = NULL, *name_char = NULL; time_t now = time(NULL); - struct passwd *pw = NULL; char *user_name = NULL; int set = 0, i = 0, is_admin=0; MYSQL_RES *result = NULL; @@ -3406,7 +3492,7 @@ extern List acct_storage_p_modify_associations( * set if they are an operator or greater and then * check it below after the query. */ - if(uid == slurmdbd_conf->slurm_user_id + if((uid == slurmdbd_conf->slurm_user_id || uid == 0) || assoc_mgr_get_admin_level(mysql_conn, uid) >= ACCT_ADMIN_OPERATOR) is_admin = 1; @@ -3431,10 +3517,6 @@ extern List acct_storage_p_modify_associations( is_admin = 1; } - if((pw=getpwuid(uid))) { - user_name = pw->pw_name; - } - if(assoc_cond->acct_list && list_count(assoc_cond->acct_list)) { set = 0; xstrcat(extra, " && ("); @@ -3475,9 +3557,27 @@ extern List acct_storage_p_modify_associations( } list_iterator_destroy(itr); xstrcat(extra, ")"); - } else { - debug4("no user specified"); + } else if (!assoc_cond->user_list) { + debug4("no user specified looking at accounts"); xstrcat(extra, " && user = '' "); + } else { + debug4("no user specified looking at users"); + xstrcat(extra, " && user != '' "); + } + + if(assoc_cond->partition_list + && list_count(assoc_cond->partition_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(assoc_cond->partition_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "partition='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); } if(assoc_cond->id_list && list_count(assoc_cond->id_list)) { @@ -3735,9 +3835,11 @@ extern List acct_storage_p_modify_associations( xstrcat(name_char, ")"); if(vals) { - if(_modify_common(mysql_conn, DBD_MODIFY_ASSOCS, now, - user_name, assoc_table, name_char, vals) - == SLURM_ERROR) { + user_name = uid_to_string((uid_t) uid); + rc = _modify_common(mysql_conn, DBD_MODIFY_ASSOCS, now, + user_name, assoc_table, name_char, vals); + xfree(user_name); + if (rc == SLURM_ERROR) { if(mysql_conn->rollback) { mysql_db_rollback(mysql_conn->db_conn); } @@ -3770,7 +3872,6 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, char *extra = NULL, *query = NULL, *name_char = NULL, *assoc_char = NULL; time_t now = time(NULL); - struct passwd *pw = NULL; char *user_name = NULL; int set = 0; MYSQL_RES *result = NULL; @@ -3784,10 +3885,6 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, if(_check_connection(mysql_conn) != SLURM_SUCCESS) return NULL; - if((pw=getpwuid(uid))) { - user_name = pw->pw_name; - } - xstrcat(extra, "where deleted=0"); if(user_cond->assoc_cond && user_cond->assoc_cond->user_list @@ -3883,15 +3980,16 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, } xfree(query); - if(_remove_common(mysql_conn, DBD_REMOVE_USERS, now, - user_name, user_table, name_char, assoc_char) - == SLURM_ERROR) { + user_name = uid_to_string((uid_t) uid); + rc = _remove_common(mysql_conn, DBD_REMOVE_USERS, now, + user_name, user_table, name_char, assoc_char); + xfree(user_name); + xfree(name_char); + if (rc == SLURM_ERROR) { list_destroy(ret_list); - xfree(name_char); xfree(assoc_char); return NULL; } - xfree(name_char); query = xstrdup_printf( "update %s as t2 set deleted=1, mod_time=%d where %s", @@ -3920,9 +4018,8 @@ extern List acct_storage_p_remove_coord(mysql_conn_t *mysql_conn, uint32_t uid, #ifdef HAVE_MYSQL char *query = NULL, *object = NULL, *extra = NULL, *last_user = NULL; char *user_name = NULL; - struct passwd *pw = NULL; time_t now = time(NULL); - int set = 0, is_admin=0; + int set = 0, is_admin=0, rc; ListIterator itr = NULL; acct_user_rec_t *user_rec = NULL; List ret_list = NULL; @@ -3953,7 +4050,7 @@ extern List acct_storage_p_remove_coord(mysql_conn_t *mysql_conn, uint32_t uid, * set if they are an operator or greater and then * check it below after the query. */ - if(uid == slurmdbd_conf->slurm_user_id + if((uid == slurmdbd_conf->slurm_user_id || uid == 0) || assoc_mgr_get_admin_level(mysql_conn, uid) >= ACCT_ADMIN_OPERATOR) is_admin = 1; @@ -3978,10 +4075,6 @@ extern List acct_storage_p_remove_coord(mysql_conn_t *mysql_conn, uint32_t uid, is_admin = 1; } - if((pw=getpwuid(uid))) { - user_name = pw->pw_name; - } - /* Leave it this way since we are using extra below */ if(user_cond->assoc_cond && user_cond->assoc_cond->user_list @@ -4083,16 +4176,18 @@ extern List acct_storage_p_remove_coord(mysql_conn_t *mysql_conn, uint32_t uid, } mysql_free_result(result); - if(_remove_common(mysql_conn, DBD_REMOVE_ACCOUNT_COORDS, now, - user_name, acct_coord_table, extra, NULL) - == SLURM_ERROR) { + user_name = uid_to_string((uid_t) uid); + rc = _remove_common(mysql_conn, DBD_REMOVE_ACCOUNT_COORDS, now, + user_name, acct_coord_table, extra, NULL); + xfree(user_name); + xfree(extra); + if (rc == SLURM_ERROR) { list_destroy(ret_list); list_destroy(user_list); - xfree(extra); errno = SLURM_ERROR; return NULL; } - xfree(extra); + /* get the update list set */ itr = list_iterator_create(user_list); while((last_user = list_next(itr))) { @@ -4122,7 +4217,6 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, char *extra = NULL, *query = NULL, *name_char = NULL, *assoc_char = NULL; time_t now = time(NULL); - struct passwd *pw = NULL; char *user_name = NULL; int set = 0; MYSQL_RES *result = NULL; @@ -4133,10 +4227,6 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, return NULL; } - if((pw=getpwuid(uid))) { - user_name = pw->pw_name; - } - if(_check_connection(mysql_conn) != SLURM_SUCCESS) return NULL; @@ -4240,16 +4330,16 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, } xfree(query); - if(_remove_common(mysql_conn, DBD_REMOVE_ACCOUNTS, now, - user_name, acct_table, name_char, assoc_char) - == SLURM_ERROR) { + user_name = uid_to_string((uid_t) uid); + rc = _remove_common(mysql_conn, DBD_REMOVE_ACCOUNTS, now, + user_name, acct_table, name_char, assoc_char); + xfree(user_name); + xfree(name_char); + xfree(assoc_char); + if (rc == SLURM_ERROR) { list_destroy(ret_list); - xfree(name_char); - xfree(assoc_char); return NULL; } - xfree(name_char); - xfree(assoc_char); return ret_list; #else @@ -4269,7 +4359,6 @@ extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn, char *extra = NULL, *query = NULL, *name_char = NULL, *assoc_char = NULL; time_t now = time(NULL); - struct passwd *pw = NULL; char *user_name = NULL; int set = 0; MYSQL_RES *result = NULL; @@ -4284,9 +4373,6 @@ extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn, if(_check_connection(mysql_conn) != SLURM_SUCCESS) return NULL; - if((pw=getpwuid(uid))) { - user_name = pw->pw_name; - } xstrcat(extra, "where deleted=0"); if(cluster_cond->cluster_list && list_count(cluster_cond->cluster_list)) { @@ -4373,16 +4459,16 @@ extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn, assoc_char = xstrdup_printf("t2.acct='root' && (%s)", extra); xfree(extra); - if(_remove_common(mysql_conn, DBD_REMOVE_CLUSTERS, now, - user_name, cluster_table, name_char, assoc_char) - == SLURM_ERROR) { + user_name = uid_to_string((uid_t) uid); + rc = _remove_common(mysql_conn, DBD_REMOVE_CLUSTERS, now, + user_name, cluster_table, name_char, assoc_char); + xfree(user_name); + xfree(name_char); + xfree(assoc_char); + if (rc == SLURM_ERROR) { list_destroy(ret_list); - xfree(name_char); - xfree(assoc_char); return NULL; } - xfree(name_char); - xfree(assoc_char); return ret_list; #else @@ -4402,7 +4488,6 @@ extern List acct_storage_p_remove_associations( char *extra = NULL, *query = NULL, *name_char = NULL, *assoc_char = NULL; time_t now = time(NULL); - struct passwd *pw = NULL; char *user_name = NULL; int set = 0, i = 0, is_admin=0; MYSQL_RES *result = NULL; @@ -4452,7 +4537,7 @@ extern List acct_storage_p_remove_associations( * set if they are an operator or greater and then * check it below after the query. */ - if(uid == slurmdbd_conf->slurm_user_id + if((uid == slurmdbd_conf->slurm_user_id || uid == 0) || assoc_mgr_get_admin_level(mysql_conn, uid) >= ACCT_ADMIN_OPERATOR) is_admin = 1; @@ -4479,10 +4564,6 @@ extern List acct_storage_p_remove_associations( xstrcat(extra, "where id>0 && deleted=0"); - if((pw=getpwuid(uid))) { - user_name = pw->pw_name; - } - if(assoc_cond->acct_list && list_count(assoc_cond->acct_list)) { set = 0; xstrcat(extra, " && ("); @@ -4525,6 +4606,21 @@ extern List acct_storage_p_remove_associations( xstrcat(extra, ")"); } + if(assoc_cond->partition_list + && list_count(assoc_cond->partition_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(assoc_cond->partition_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "partition='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + if(assoc_cond->id_list && list_count(assoc_cond->id_list)) { set = 0; xstrcat(extra, " && ("); @@ -4672,15 +4768,14 @@ extern List acct_storage_p_remove_associations( } mysql_free_result(result); - if(_remove_common(mysql_conn, DBD_REMOVE_ASSOCS, now, - user_name, assoc_table, name_char, assoc_char) - == SLURM_ERROR) { - xfree(name_char); - xfree(assoc_char); - goto end_it; - } + user_name = uid_to_string((uid_t) uid); + rc = _remove_common(mysql_conn, DBD_REMOVE_ASSOCS, now, + user_name, assoc_table, name_char, assoc_char); + xfree(user_name); xfree(name_char); xfree(assoc_char); + if (rc == SLURM_ERROR) + goto end_it; return ret_list; end_it: @@ -4712,7 +4807,6 @@ extern List acct_storage_p_remove_qos(mysql_conn_t *mysql_conn, uint32_t uid, char *extra = NULL, *query = NULL, *name_char = NULL, *assoc_char = NULL; time_t now = time(NULL); - struct passwd *pw = NULL; char *user_name = NULL; int set = 0; MYSQL_RES *result = NULL; @@ -4723,10 +4817,6 @@ extern List acct_storage_p_remove_qos(mysql_conn_t *mysql_conn, uint32_t uid, return NULL; } - if((pw=getpwuid(uid))) { - user_name = pw->pw_name; - } - if(_check_connection(mysql_conn) != SLURM_SUCCESS) return NULL; @@ -4821,14 +4911,15 @@ extern List acct_storage_p_remove_qos(mysql_conn_t *mysql_conn, uint32_t uid, } xfree(query); - if(_remove_common(mysql_conn, DBD_REMOVE_ACCOUNTS, now, - user_name, qos_table, name_char, assoc_char) - == SLURM_ERROR) { + user_name = uid_to_string((uid_t) uid); + rc = _remove_common(mysql_conn, DBD_REMOVE_ACCOUNTS, now, + user_name, qos_table, name_char, assoc_char); + xfree(name_char); + xfree(user_name); + if (rc == SLURM_ERROR) { list_destroy(ret_list); - xfree(name_char); return NULL; } - xfree(name_char); return ret_list; #else @@ -4956,7 +5047,7 @@ empty: while((row = mysql_fetch_row(result))) { acct_user_rec_t *user = xmalloc(sizeof(acct_user_rec_t)); -/* struct passwd *passwd_ptr = NULL; */ +/* uid_t pw_uid; */ list_append(user_list, user); user->name = xstrdup(row[USER_REQ_NAME]); @@ -4972,11 +5063,12 @@ empty: * different machine where this user may not exist or * may have a different uid */ -/* passwd_ptr = getpwnam(user->name); */ -/* if(passwd_ptr) */ -/* user->uid = passwd_ptr->pw_uid; */ -/* else */ +/* pw_uid = uid_from_string(user->name); */ +/* if(pw_uid == (uid_t) -1) */ /* user->uid = (uint32_t)NO_VAL; */ +/* else */ +/* user->uid = passwd_ptr->pw_uid; */ + if(user_cond && user_cond->with_coords) { _get_user_coords(mysql_conn, user); } @@ -5601,7 +5693,8 @@ empty: assoc_table, row[ASSOC_REQ_ACCT], row[ASSOC_REQ_CLUSTER], without_parent_limits); - + debug4("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); if(!(result2 = mysql_db_query_ret( mysql_conn->db_conn, query, 1))) { xfree(query); @@ -5621,7 +5714,7 @@ empty: parent_mnpj = atoi(row2[ASSOC2_REQ_MNPJ]); else - parent_mwpj = INFINITE; + parent_mnpj = INFINITE; if(row2[ASSOC2_REQ_MWPJ]) parent_mwpj = @@ -6167,13 +6260,21 @@ extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn, last_month = atoi(row[UPDATE_MONTH]); mysql_free_result(result); } else { + time_t now = time(NULL); + /* If we don't have any events like adding a + * cluster this will not work correctly, so we + * will insert now as a starting point. + */ query = xstrdup_printf( + "set @PS = %d;" "select @PS := period_start from %s limit 1;" "insert into %s " "(hourly_rollup, daily_rollup, monthly_rollup) " "values (@PS, @PS, @PS);", - event_table, last_ran_table); + now, event_table, last_ran_table); + debug3("%d(%d) query\n%s", mysql_conn->conn, + __LINE__, query); mysql_free_result(result); if(!(result = mysql_db_query_ret( mysql_conn->db_conn, query, 0))) { @@ -6328,7 +6429,7 @@ extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn, } if(query) { - debug3("%s", query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); } @@ -6373,10 +6474,19 @@ extern int clusteracct_storage_p_node_down(mysql_conn_t *mysql_conn, "update %s set period_end=%d where cluster='%s' " "and period_end=0 and node_name='%s';", event_table, event_time, cluster, node_ptr->name); + /* If you are clean-restarting the controller over and over again you + * could get records that are duplicates in the database. If + * this is the case we will zero out the period_end we are + * just filled in. This will cause the last time to be erased + * from the last restart, but if you are restarting things + * this often the pervious one didn't mean anything anyway. + * This way we only get one for the last time we let it run. + */ xstrfmtcat(query, "insert into %s " "(node_name, cluster, cpu_count, period_start, reason) " - "values ('%s', '%s', %u, %d, '%s');", + "values ('%s', '%s', %u, %d, '%s') on duplicate key " + "update period_end=0;", event_table, node_ptr->name, cluster, cpus, event_time, my_reason); rc = mysql_db_query(mysql_conn->db_conn, query); @@ -6794,7 +6904,16 @@ extern int jobacct_storage_p_job_complete(mysql_conn_t *mysql_conn, job_ptr->job_id, job_ptr->assoc_id); if(job_ptr->db_index == (uint32_t)-1) { - + /* If we get an error with this just fall + * through to avoid an infinite loop + */ + if(jobacct_storage_p_job_start(mysql_conn, job_ptr) + == SLURM_ERROR) { + error("couldn't add job %u at job completion", + job_ptr->job_id); + return SLURM_SUCCESS; + } + jobacct_storage_p_job_start(mysql_conn, job_ptr); } } @@ -6880,8 +6999,18 @@ extern int jobacct_storage_p_step_start(mysql_conn_t *mysql_conn, step_ptr->job_ptr->details->submit_time, step_ptr->job_ptr->job_id, step_ptr->job_ptr->assoc_id); - if(step_ptr->job_ptr->db_index == (uint32_t)-1) - return SLURM_ERROR; + if(step_ptr->job_ptr->db_index == (uint32_t)-1) { + /* If we get an error with this just fall + * through to avoid an infinite loop + */ + if(jobacct_storage_p_job_start(mysql_conn, + step_ptr->job_ptr) + == SLURM_ERROR) { + error("couldn't add job %u at step start", + step_ptr->job_ptr->job_id); + return SLURM_SUCCESS; + } + } } /* we want to print a -1 for the requid so leave it a %d */ @@ -6987,8 +7116,19 @@ extern int jobacct_storage_p_step_complete(mysql_conn_t *mysql_conn, step_ptr->job_ptr->details->submit_time, step_ptr->job_ptr->job_id, step_ptr->job_ptr->assoc_id); - if(step_ptr->job_ptr->db_index == -1) - return SLURM_ERROR; + if(step_ptr->job_ptr->db_index == (uint32_t)-1) { + /* If we get an error with this just fall + * through to avoid an infinite loop + */ + if(jobacct_storage_p_job_start(mysql_conn, + step_ptr->job_ptr) + == SLURM_ERROR) { + error("couldn't add job %u " + "at step completion", + step_ptr->job_ptr->job_id); + return SLURM_SUCCESS; + } + } } query = xstrdup_printf( @@ -7061,8 +7201,17 @@ extern int jobacct_storage_p_suspend(mysql_conn_t *mysql_conn, job_ptr->details->submit_time, job_ptr->job_id, job_ptr->assoc_id); - if(job_ptr->db_index == -1) - return SLURM_ERROR; + if(job_ptr->db_index == (uint32_t)-1) { + /* If we get an error with this just fall + * through to avoid an infinite loop + */ + if(jobacct_storage_p_job_start(mysql_conn, job_ptr) + == SLURM_ERROR) { + error("couldn't suspend job %u", + job_ptr->job_id); + return SLURM_SUCCESS; + } + } } if (job_ptr->job_state == JOB_SUSPENDED) diff --git a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c index 094af22df..0b0c44edd 100644 --- a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c +++ b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c @@ -503,7 +503,7 @@ no_cond: job->exitcode = atoi(row[JOB_REQ_COMP_CODE]); job->partition = xstrdup(row[JOB_REQ_PARTITION]); job->nodes = xstrdup(row[JOB_REQ_NODELIST]); - if (!strcmp(job->nodes, "(null)")) { + if (!job->nodes || !strcmp(job->nodes, "(null)")) { xfree(job->nodes); job->nodes = xstrdup("(unknown)"); } diff --git a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c index cda102757..f0c8556f2 100644 --- a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c +++ b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c @@ -56,6 +56,7 @@ #include "src/common/read_config.h" #include "src/common/slurm_accounting_storage.h" #include "src/common/slurmdbd_defs.h" +#include "src/common/uid.h" #include "src/common/xstring.h" #include "src/slurmctld/slurmctld.h" @@ -1355,7 +1356,6 @@ extern List jobacct_storage_p_get_jobs(void *db_conn, dbd_list_msg_t *got_msg; int rc; List job_list = NULL; - struct passwd *pw = NULL; get_msg.selected_steps = selected_steps; get_msg.selected_parts = selected_parts; @@ -1367,14 +1367,15 @@ extern List jobacct_storage_p_get_jobs(void *db_conn, } get_msg.gid = params->opt_gid; - if (params->opt_uid >=0 && (pw=getpwuid(params->opt_uid))) - get_msg.user = pw->pw_name; + if (params->opt_uid >=0) + get_msg.user = uid_to_string((uid_t) params->opt_uid); else get_msg.user = NULL; req.msg_type = DBD_GET_JOBS; req.data = &get_msg; rc = slurm_send_recv_slurmdbd_msg(&req, &resp); + xfree(get_msg.user); if (rc != SLURM_SUCCESS) error("slurmdbd: DBD_GET_JOBS failure: %m"); diff --git a/src/plugins/auth/munge/auth_munge.c b/src/plugins/auth/munge/auth_munge.c index 403292765..aa01a42dd 100644 --- a/src/plugins/auth/munge/auth_munge.c +++ b/src/plugins/auth/munge/auth_munge.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * auth_munge.c - SLURM auth implementation via Chris Dunlap's Munge - * $Id: auth_munge.c 14360 2008-06-26 16:17:40Z da $ + * $Id: auth_munge.c 14770 2008-08-14 18:24:35Z da $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -512,7 +512,6 @@ _decode_cred(slurm_auth_credential_t *c, char *socket) done: munge_ctx_destroy(ctx); - return e ? SLURM_ERROR : SLURM_SUCCESS; } diff --git a/src/plugins/jobcomp/filetxt/jobcomp_filetxt.c b/src/plugins/jobcomp/filetxt/jobcomp_filetxt.c index a4e67243b..77b558162 100644 --- a/src/plugins/jobcomp/filetxt/jobcomp_filetxt.c +++ b/src/plugins/jobcomp/filetxt/jobcomp_filetxt.c @@ -53,6 +53,7 @@ #include "src/common/slurm_protocol_defs.h" #include "src/common/slurm_jobcomp.h" #include "src/common/parse_time.h" +#include "src/common/uid.h" #include "filetxt_jobcomp_process.h" /* @@ -115,21 +116,15 @@ static void _get_user_name(uint32_t user_id, char *user_name, int buf_size) { static uint32_t cache_uid = 0; - static char cache_name[32] = "root"; - struct passwd * user_info = NULL; - - if (user_id == cache_uid) - snprintf(user_name, buf_size, "%s", cache_name); - else { - user_info = getpwuid((uid_t) user_id); - if (user_info && user_info->pw_name[0]) - snprintf(cache_name, sizeof(cache_name), "%s", - user_info->pw_name); - else - snprintf(cache_name, sizeof(cache_name), "Unknown"); + static char cache_name[32] = "root", *uname; + + if (user_id != cache_uid) { + uname = uid_to_string((uid_t) user_id); + snprintf(cache_name, sizeof(cache_name), "%s", uname); + xfree(uname); cache_uid = user_id; - snprintf(user_name, buf_size, "%s", cache_name); } + snprintf(user_name, buf_size, "%s", cache_name); } /* get the group name for the give group_id */ @@ -137,21 +132,15 @@ static void _get_group_name(uint32_t group_id, char *group_name, int buf_size) { static uint32_t cache_gid = 0; - static char cache_name[32] = "root"; - struct group *group_info = NULL; - - if (group_id == cache_gid) - snprintf(group_name, buf_size, "%s", cache_name); - else { - group_info = getgrgid((gid_t) group_id); - if (group_info && group_info->gr_name[0]) - snprintf(cache_name, sizeof(cache_name), "%s", - group_info->gr_name); - else - snprintf(cache_name, sizeof(cache_name), "Unknown"); + static char cache_name[32] = "root", *gname; + + if (group_id != cache_gid) { + gname = gid_to_string((gid_t) group_id); + snprintf(cache_name, sizeof(cache_name), "%s", gname); + xfree(gname); cache_gid = group_id; - snprintf(group_name, buf_size, "%s", cache_name); } + snprintf(group_name, buf_size, "%s", cache_name); } /* diff --git a/src/plugins/jobcomp/mysql/jobcomp_mysql.c b/src/plugins/jobcomp/mysql/jobcomp_mysql.c index 69f24292c..98fa01f33 100644 --- a/src/plugins/jobcomp/mysql/jobcomp_mysql.c +++ b/src/plugins/jobcomp/mysql/jobcomp_mysql.c @@ -42,6 +42,7 @@ #include <sys/types.h> #include "src/common/parse_time.h" #include "src/common/node_select.h" +#include "src/common/uid.h" /* * These variables are required by the generic plugin interface. If they @@ -155,18 +156,14 @@ static int _mysql_jobcomp_check_tables() static char *_get_user_name(uint32_t user_id) { static uint32_t cache_uid = 0; - static char cache_name[32] = "root"; - struct passwd * user_info = NULL; + static char cache_name[32] = "root", *uname; char *ret_name = NULL; slurm_mutex_lock(&jobcomp_lock); if (user_id != cache_uid) { - user_info = getpwuid((uid_t) user_id); - if (user_info && user_info->pw_name[0]) - snprintf(cache_name, sizeof(cache_name), "%s", - user_info->pw_name); - else - snprintf(cache_name, sizeof(cache_name), "Unknown"); + uname = uid_to_string((uid_t) user_id); + snprintf(cache_name, sizeof(cache_name), "%s", uname); + xfree(uname); cache_uid = user_id; } ret_name = xstrdup(cache_name); @@ -179,18 +176,14 @@ static char *_get_user_name(uint32_t user_id) static char *_get_group_name(uint32_t group_id) { static uint32_t cache_gid = 0; - static char cache_name[32] = "root"; - struct group *group_info = NULL; + static char cache_name[32] = "root", *gname; char *ret_name = NULL; slurm_mutex_lock(&jobcomp_lock); if (group_id != cache_gid) { - group_info = getgrgid((gid_t) group_id); - if (group_info && group_info->gr_name[0]) - snprintf(cache_name, sizeof(cache_name), "%s", - group_info->gr_name); - else - snprintf(cache_name, sizeof(cache_name), "Unknown"); + gname = gid_to_string((gid_t) group_id); + snprintf(cache_name, sizeof(cache_name), "%s", gname); + xfree(gname); cache_gid = group_id; } ret_name = xstrdup(cache_name); diff --git a/src/plugins/jobcomp/pgsql/jobcomp_pgsql.c b/src/plugins/jobcomp/pgsql/jobcomp_pgsql.c index c9f9ad5b1..a4a3438f5 100644 --- a/src/plugins/jobcomp/pgsql/jobcomp_pgsql.c +++ b/src/plugins/jobcomp/pgsql/jobcomp_pgsql.c @@ -42,6 +42,7 @@ #include <sys/types.h> #include "src/common/parse_time.h" #include "src/common/node_select.h" +#include "src/common/uid.h" /* * These variables are required by the generic plugin interface. If they @@ -178,18 +179,14 @@ static int _pgsql_jobcomp_check_tables(char *user) static char *_get_user_name(uint32_t user_id) { static uint32_t cache_uid = 0; - static char cache_name[32] = "root"; - struct passwd * user_info = NULL; + static char cache_name[32] = "root", *uname; char *ret_name = NULL; slurm_mutex_lock(&jobcomp_lock); if (user_id != cache_uid) { - user_info = getpwuid((uid_t) user_id); - if (user_info && user_info->pw_name[0]) - snprintf(cache_name, sizeof(cache_name), "%s", - user_info->pw_name); - else - snprintf(cache_name, sizeof(cache_name), "Unknown"); + uname = uid_to_string((uid_t) user_id); + snprintf(cache_name, sizeof(cache_name), "%s", uname); + xfree(uname); cache_uid = user_id; } ret_name = xstrdup(cache_name); @@ -202,18 +199,14 @@ static char *_get_user_name(uint32_t user_id) static char *_get_group_name(uint32_t group_id) { static uint32_t cache_gid = 0; - static char cache_name[32] = "root"; - struct group *group_info = NULL; + static char cache_name[32] = "root", *gname; char *ret_name = NULL; slurm_mutex_lock(&jobcomp_lock); if (group_id != cache_gid) { - group_info = getgrgid((gid_t) group_id); - if (group_info && group_info->gr_name[0]) - snprintf(cache_name, sizeof(cache_name), "%s", - group_info->gr_name); - else - snprintf(cache_name, sizeof(cache_name), "Unknown"); + gname = gid_to_string((gid_t) group_id); + snprintf(cache_name, sizeof(cache_name), "%s", gname); + xfree(gname); cache_gid = group_id; } ret_name = xstrdup(cache_name); diff --git a/src/plugins/sched/backfill/backfill_wrapper.c b/src/plugins/sched/backfill/backfill_wrapper.c index d2940cffe..ce23c5808 100644 --- a/src/plugins/sched/backfill/backfill_wrapper.c +++ b/src/plugins/sched/backfill/backfill_wrapper.c @@ -115,7 +115,7 @@ void fini( void ) verbose( "Backfill scheduler plugin shutting down" ); stop_backfill_agent(); _cancel_thread( backfill_thread ); - backfill_thread = false; + backfill_thread = 0; } pthread_mutex_unlock( &thread_flag_mutex ); } diff --git a/src/plugins/sched/wiki/get_jobs.c b/src/plugins/sched/wiki/get_jobs.c index 46dbce9a1..3989f614f 100644 --- a/src/plugins/sched/wiki/get_jobs.c +++ b/src/plugins/sched/wiki/get_jobs.c @@ -48,7 +48,6 @@ static char * _dump_all_jobs(int *job_cnt, time_t update_time); static char * _dump_job(struct job_record *job_ptr, time_t update_time); -static char * _get_group_name(gid_t gid); static uint16_t _get_job_cpus_per_task(struct job_record *job_ptr); static uint32_t _get_job_end_time(struct job_record *job_ptr); static char * _get_job_features(struct job_record *job_ptr); @@ -73,6 +72,7 @@ static char * _task_list(struct job_record *job_ptr); * ARG=<cnt>#<JOBID>; * STATE=<state>; Moab equivalent job state * [HOSTLIST=<node1:node2>;] list of required nodes, if any + * [STARTDATE=<uts>;] earliest start time, if any * [TASKLIST=<node1:node2>;] nodes in use, if running or completing * [RFEATURES=<features>;] required features, if any, * NOTE: OR operator not supported @@ -202,6 +202,7 @@ static char * _dump_all_jobs(int *job_cnt, time_t update_time) static char * _dump_job(struct job_record *job_ptr, time_t update_time) { char tmp[16384], *buf = NULL; + char *uname, *gname; uint32_t end_time, suspend_time; if (!job_ptr) @@ -213,17 +214,24 @@ static char * _dump_job(struct job_record *job_ptr, time_t update_time) if (update_time > last_job_update) return buf; - + if ((job_ptr->job_state == JOB_PENDING) - && (job_ptr->details) - && (job_ptr->details->req_nodes) - && (job_ptr->details->req_nodes[0])) { - char *hosts = bitmap2wiki_node_name( - job_ptr->details->req_node_bitmap); - snprintf(tmp, sizeof(tmp), - "HOSTLIST=%s;", hosts); - xstrcat(buf, tmp); - xfree(hosts); + && (job_ptr->details)) { + if ((job_ptr->details->req_nodes) + && (job_ptr->details->req_nodes[0])) { + char *hosts = bitmap2wiki_node_name( + job_ptr->details->req_node_bitmap); + snprintf(tmp, sizeof(tmp), + "HOSTLIST=%s;", hosts); + xstrcat(buf, tmp); + xfree(hosts); + } + if (job_ptr->details->begin_time) { + snprintf(tmp, sizeof(tmp), + "STARTDATE=%u;", (uint32_t) + job_ptr->details->begin_time); + xstrcat(buf, tmp); + } } else if (!IS_JOB_FINISHED(job_ptr)) { char *hosts = _task_list(job_ptr); snprintf(tmp, sizeof(tmp), @@ -317,11 +325,13 @@ static char * _dump_job(struct job_record *job_ptr, time_t update_time) (update_time > job_ptr->details->submit_time)) return buf; + uname = uid_to_string((uid_t) job_ptr->user_id); + gname = gid_to_string(job_ptr->group_id); snprintf(tmp, sizeof(tmp), - "UNAME=%s;GNAME=%s;", - uid_to_string((uid_t) job_ptr->user_id), - _get_group_name(job_ptr->group_id)); + "UNAME=%s;GNAME=%s;", uname, gname); xstrcat(buf, tmp); + xfree(uname); + xfree(gname); return buf; } @@ -363,16 +373,6 @@ static uint32_t _get_job_min_nodes(struct job_record *job_ptr) return (uint32_t) 1; } -static char * _get_group_name(gid_t gid) -{ - struct group *grp; - - grp = getgrgid(gid); - if (grp) - return grp->gr_name; - return "nobody"; -} - static uint32_t _get_job_submit_time(struct job_record *job_ptr) { if (job_ptr->details) diff --git a/src/plugins/sched/wiki/msg.c b/src/plugins/sched/wiki/msg.c index 8f3ec8252..e0e18decf 100644 --- a/src/plugins/sched/wiki/msg.c +++ b/src/plugins/sched/wiki/msg.c @@ -658,14 +658,22 @@ static void _proc_msg(slurm_fd new_fd, char *msg) static void _send_reply(slurm_fd new_fd, char *response) { size_t i; - char *buf, sum[20]; + char *buf, sum[20], *tmp; + static char uname[64] = ""; i = strlen(response); i += 100; /* leave room for header */ buf = xmalloc(i); + if (uname[0] == '\0') { + tmp = uid_to_string(getuid()); + strncpy(uname, tmp, sizeof(uname)); + uname[sizeof(uname) - 1] = '\0'; + xfree(tmp); + } + snprintf(buf, i, "CK=dummy67890123456 TS=%u AUTH=%s DT=%s", - (uint32_t) time(NULL), uid_to_string(getuid()), response); + (uint32_t) time(NULL), uname, response); checksum(sum, auth_key, (buf+20)); /* overwrite "CK=dummy..." above */ memcpy(buf, sum, 19); diff --git a/src/plugins/sched/wiki2/get_jobs.c b/src/plugins/sched/wiki2/get_jobs.c index ed5d46d06..709b572ff 100644 --- a/src/plugins/sched/wiki2/get_jobs.c +++ b/src/plugins/sched/wiki2/get_jobs.c @@ -48,7 +48,6 @@ static char * _dump_all_jobs(int *job_cnt, time_t update_time); static char * _dump_job(struct job_record *job_ptr, time_t update_time); -static char * _get_group_name(gid_t gid); static void _get_job_comment(struct job_record *job_ptr, char *buffer, int buf_size); static uint16_t _get_job_cpus_per_task(struct job_record *job_ptr); @@ -232,6 +231,7 @@ static char * _dump_all_jobs(int *job_cnt, time_t update_time) static char * _dump_job(struct job_record *job_ptr, time_t update_time) { char tmp[16384], *buf = NULL; + char *uname, *gname; uint32_t end_time, suspend_time; int i, rej_sent = 0; @@ -367,11 +367,13 @@ static char * _dump_job(struct job_record *job_ptr, time_t update_time) (update_time > job_ptr->details->submit_time)) return buf; + uname = uid_to_string((uid_t) job_ptr->user_id); + gname = gid_to_string(job_ptr->group_id); snprintf(tmp, sizeof(tmp), - "UNAME=%s;GNAME=%s;", - uid_to_string((uid_t) job_ptr->user_id), - _get_group_name(job_ptr->group_id)); + "UNAME=%s;GNAME=%s;", uname, gname); xstrcat(buf, tmp); + xfree(uname); + xfree(gname); return buf; } @@ -469,16 +471,6 @@ static uint32_t _get_job_min_nodes(struct job_record *job_ptr) return (uint32_t) 1; } -static char * _get_group_name(gid_t gid) -{ - struct group *grp; - - grp = getgrgid(gid); - if (grp) - return grp->gr_name; - return "nobody"; -} - static uint32_t _get_job_submit_time(struct job_record *job_ptr) { if (job_ptr->details) diff --git a/src/plugins/sched/wiki2/msg.c b/src/plugins/sched/wiki2/msg.c index 01cd635c6..159535ebc 100644 --- a/src/plugins/sched/wiki2/msg.c +++ b/src/plugins/sched/wiki2/msg.c @@ -719,14 +719,22 @@ static void _proc_msg(slurm_fd new_fd, char *msg) static void _send_reply(slurm_fd new_fd, char *response) { size_t i; - char *buf, sum[20]; + char *buf, sum[20], *tmp; + static char uname[64] = ""; i = strlen(response); i += 100; /* leave room for header */ buf = xmalloc(i); + if (uname[0] == '\0') { + tmp = uid_to_string(getuid()); + strncpy(uname, tmp, sizeof(uname)); + uname[sizeof(uname) - 1] = '\0'; + xfree(tmp); + } + snprintf(buf, i, "CK=dummy67890123456 TS=%u AUTH=%s DT=%s", - (uint32_t) time(NULL), uid_to_string(getuid()), response); + (uint32_t) time(NULL), uname, response); checksum(sum, auth_key, (buf+20)); /* overwrite "CK=dummy..." above */ memcpy(buf, sum, 19); diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c index f210fec9e..9f0cea8a6 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_place.c +++ b/src/plugins/select/bluegene/plugin/bg_job_place.c @@ -2,7 +2,7 @@ * bg_job_place.c - blue gene job placement (e.g. base block selection) * functions. * - * $Id: bg_job_place.c 14295 2008-06-19 23:58:28Z da $ + * $Id: bg_job_place.c 14660 2008-07-30 17:39:47Z jette $ ***************************************************************************** * Copyright (C) 2004-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -41,6 +41,7 @@ #include <pwd.h> #include "src/common/node_select.h" +#include "src/common/uid.h" #include "src/slurmctld/trigger_mgr.h" #include "bluegene.h" #include "dynamic_block.h" @@ -221,32 +222,21 @@ static int _bg_record_sort_aval_dec(bg_record_t* rec_a, bg_record_t* rec_b) static int _get_user_groups(uint32_t user_id, uint32_t group_id, gid_t *groups, int max_groups, int *ngroups) { - struct passwd pwd, *results; - char *buffer; - static size_t buf_size = 0; int rc; + char *user_name; - if (!buf_size && ((buf_size = sysconf(_SC_GETPW_R_SIZE_MAX)) < 0)) { - error("sysconf(_SC_GETPW_R_SIZE_MAX)"); - return -1; - } - buffer = xmalloc(buf_size); - rc = getpwuid_r((uid_t) user_id, &pwd, buffer, buf_size, &results); - if (rc != 0) { - error("getpwuid_r(%u): %m", user_id); - xfree(buffer); - return -1; - } + user_name = uid_to_string((uid_t) user_id); *ngroups = max_groups; - rc = getgrouplist(pwd.pw_name, (gid_t) group_id, groups, ngroups); - xfree(buffer); + rc = getgrouplist(user_name, (gid_t) group_id, groups, ngroups); if (rc < 0) { - error("getgrouplist(%s): %m", pwd.pw_name); - return -1; + error("getgrouplist(%s): %m", user_name); + rc = -1; + } else { + *ngroups = rc; + rc = 0; } - *ngroups = rc; - - return 0; + xfree(user_name); + return rc; } /* diff --git a/src/plugins/select/bluegene/plugin/bg_job_run.c b/src/plugins/select/bluegene/plugin/bg_job_run.c index 4f0e7383f..0e70c99fc 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_run.c +++ b/src/plugins/select/bluegene/plugin/bg_job_run.c @@ -2,7 +2,7 @@ * bg_job_run.c - blue gene job execution (e.g. initiation and termination) * functions. * - * $Id: bg_job_run.c 13947 2008-04-29 19:35:34Z jette $ + * $Id: bg_job_run.c 14660 2008-07-30 17:39:47Z jette $ ***************************************************************************** * Copyright (C) 2004-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -233,8 +233,8 @@ static void _sync_agent(bg_update_t *bg_update_ptr) bg_update_ptr->job_ptr->job_id, bg_update_ptr->bg_block_id); xfree(bg_record->target_name); - bg_record->target_name = xstrdup( - uid_to_string(bg_update_ptr->job_ptr->user_id)); + bg_record->target_name = + uid_to_string(bg_update_ptr->job_ptr->user_id); set_user_rc = set_block_user(bg_record); slurm_mutex_unlock(&block_state_mutex); @@ -454,8 +454,8 @@ static void _start_agent(bg_update_t *bg_update_ptr) bg_record->boot_count = 0; xfree(bg_record->target_name); - bg_record->target_name = xstrdup( - uid_to_string(bg_update_ptr->job_ptr->user_id)); + bg_record->target_name = + uid_to_string(bg_update_ptr->job_ptr->user_id); debug("setting the target_name for Block %s to %s", bg_record->bg_block_id, bg_record->target_name); diff --git a/src/plugins/select/bluegene/plugin/bg_record_functions.c b/src/plugins/select/bluegene/plugin/bg_record_functions.c index 42b472710..9d7d8c7d9 100644 --- a/src/plugins/select/bluegene/plugin/bg_record_functions.c +++ b/src/plugins/select/bluegene/plugin/bg_record_functions.c @@ -39,6 +39,7 @@ #include "bluegene.h" #include "dynamic_block.h" +#include "src/common/uid.h" #include "src/slurmctld/trigger_mgr.h" /* some local functions */ @@ -456,8 +457,6 @@ extern bg_record_t *find_bg_record_in_list(List my_list, char *bg_block_id) */ extern int update_block_user(bg_record_t *bg_record, int set) { - struct passwd *pw_ent = NULL; - if(!bg_record->target_name) { error("Must set target_name to run update_block_user."); return -1; @@ -500,13 +499,15 @@ extern int update_block_user(bg_record_t *bg_record, int set) #endif if(strcmp(bg_record->target_name, bg_record->user_name)) { + uid_t pw_uid; xfree(bg_record->user_name); bg_record->user_name = xstrdup(bg_record->target_name); - if((pw_ent = getpwnam(bg_record->user_name)) == NULL) { - error("getpwnam(%s): %m", bg_record->user_name); + pw_uid = uid_from_string(bg_record->user_name); + if(pw_uid == (uid_t) -1) { + error("No such user: %s", bg_record->user_name); return -1; } else { - bg_record->user_uid = pw_ent->pw_uid; + bg_record->user_uid = pw_uid; } return 1; } @@ -616,7 +617,7 @@ extern int add_bg_record(List records, List used_nodes, blockreq_t *blockreq) bg_record_t *found_record = NULL; ba_node_t *ba_node = NULL; ListIterator itr; - struct passwd *pw_ent = NULL; + uid_t pw_uid; int i, len; int small_size = 0; int small_count = 0; @@ -635,10 +636,11 @@ extern int add_bg_record(List records, List used_nodes, blockreq_t *blockreq) bg_record->target_name = xstrdup(slurmctld_conf.slurm_user_name); slurm_conf_unlock(); - if((pw_ent = getpwnam(bg_record->user_name)) == NULL) { - error("getpwnam(%s): %m", bg_record->user_name); + pw_uid = uid_from_string(bg_record->user_name); + if(pw_uid == (uid_t) -1) { + error("No such user: %s", bg_record->user_name); } else { - bg_record->user_uid = pw_ent->pw_uid; + bg_record->user_uid = pw_uid; } bg_record->bg_block_list = list_create(destroy_ba_node); diff --git a/src/plugins/select/bluegene/plugin/block_sys.c b/src/plugins/select/bluegene/plugin/block_sys.c index eb67ba679..ce0da637c 100755 --- a/src/plugins/select/bluegene/plugin/block_sys.c +++ b/src/plugins/select/bluegene/plugin/block_sys.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * block_sys.c - component used for wiring up the blocks * - * $Id: block_sys.c 13960 2008-04-30 21:45:26Z jette $ + * $Id: block_sys.c 14691 2008-08-04 19:29:55Z jette $ ***************************************************************************** * Copyright (C) 2004 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -37,7 +37,7 @@ \*****************************************************************************/ #include "bluegene.h" - +#include "src/common/uid.h" /** these are used in the dynamic partitioning algorithm */ @@ -158,7 +158,7 @@ static int _post_allocate(bg_record_t *bg_record) #ifdef HAVE_BG_FILES int i; pm_partition_id_t block_id; - struct passwd *pw_ent = NULL; + uid_t my_uid; /* Add partition record to the DB */ debug2("adding block\n"); @@ -208,11 +208,12 @@ static int _post_allocate(bg_record_t *bg_record) bg_record->user_name = xstrdup(slurmctld_conf.slurm_user_name); slurm_conf_unlock(); - - if((pw_ent = getpwnam(bg_record->user_name)) == NULL) { - error("getpwnam(%s): %m", bg_record->user_name); + + my_uid = uid_from_string(bg_record->user_name); + if (my_uid == (uid_t) -1) { + error("uid_from_string(%s): %m", bg_record->user_name); } else { - bg_record->user_uid = pw_ent->pw_uid; + bg_record->user_uid = my_uid; } } /* We are done with the block */ @@ -378,7 +379,7 @@ int read_bg_blocks() rm_partition_t *block_ptr = NULL; char node_name_tmp[255], *user_name = NULL; bg_record_t *bg_record = NULL; - struct passwd *pw_ent = NULL; + uid_t my_uid; int *coord = NULL; int block_number, block_count; @@ -707,12 +708,12 @@ int read_bg_blocks() free(user_name); } - if((pw_ent = getpwnam(bg_record->user_name)) - == NULL) { - error("getpwnam(%s): %m", + my_uid = uid_from_string(bg_record->user_name); + if (my_uid == (uid_t) -1) { + error("uid_from_string(%s): %m", bg_record->user_name); } else { - bg_record->user_uid = pw_ent->pw_uid; + bg_record->user_uid = my_uid; } } diff --git a/src/plugins/select/bluegene/plugin/select_bluegene.c b/src/plugins/select/bluegene/plugin/select_bluegene.c index 9135f35b1..fdca775d6 100644 --- a/src/plugins/select/bluegene/plugin/select_bluegene.c +++ b/src/plugins/select/bluegene/plugin/select_bluegene.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * select_bluegene.c - node selection plugin for Blue Gene system. * - * $Id: select_bluegene.c 14295 2008-06-19 23:58:28Z da $ + * $Id: select_bluegene.c 14660 2008-07-30 17:39:47Z jette $ ***************************************************************************** * Copyright (C) 2004-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -43,6 +43,7 @@ #include "defined_block.h" #endif +#include "src/common/uid.h" #include "src/slurmctld/trigger_mgr.h" #include <fcntl.h> @@ -360,8 +361,8 @@ extern int select_p_state_restore(char *dir_name) int data_allocated, data_read = 0; char *ver_str = NULL; uint32_t ver_str_len; - struct passwd *pw_ent = NULL; int blocks = 0; + uid_t my_uid; debug("bluegene: select_p_state_restore"); #ifdef HAVE_BG_FILES @@ -542,12 +543,12 @@ extern int select_p_state_restore(char *dir_name) bg_record->user_name = xstrdup(slurmctld_conf.slurm_user_name); slurm_conf_unlock(); - if((pw_ent = getpwnam(bg_record->user_name)) - == NULL) { - error("getpwnam(%s): %m", + my_uid = uid_from_string(bg_record->user_name); + if (my_uid == (uid_t) -1) { + error("uid_from_strin(%s): %m", bg_record->user_name); } else { - bg_record->user_uid = pw_ent->pw_uid; + bg_record->user_uid = my_uid; } bg_record->blrtsimage = diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index 43c5a66bd..b5a132e06 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -2,7 +2,7 @@ * select_cons_res.c - node selection plugin supporting consumable * resources policies. * - * $Id: select_cons_res.c 14469 2008-07-09 18:15:23Z jette $ + * $Id: select_cons_res.c 14873 2008-08-25 18:19:31Z jette $ *****************************************************************************\ * * The following example below illustrates how four jobs are allocated @@ -1528,13 +1528,9 @@ extern int select_p_job_init(List job_list) suspend = 1; else suspend = 0; - if ((job->job_ptr->nodes == NULL) || - (node_name2bitmap(job->job_ptr->nodes, true, - &job->node_bitmap))) { - error("cons_res: job %u has no allocated nodes", - job->job_id); - job->node_bitmap = bit_alloc(node_record_count); - } + FREE_NULL_BITMAP(job->node_bitmap); + node_name2bitmap(job->job_ptr->nodes, true, + &job->node_bitmap); _add_job_to_nodes(job, "select_p_job_init", suspend); } list_iterator_destroy(iterator); diff --git a/src/sacct/options.c b/src/sacct/options.c index 8a924a70e..ed584204d 100644 --- a/src/sacct/options.c +++ b/src/sacct/options.c @@ -839,6 +839,10 @@ void parse_command_line(int argc, char **argv) break; case 'u': + if(!strcmp(optarg, "-1")) { + all_users = 1; + break; + } if(!params.opt_uid_list) params.opt_uid_list = list_create(slurm_destroy_char); diff --git a/src/sacct/print.c b/src/sacct/print.c index d1ff6e5f8..a1023454e 100644 --- a/src/sacct/print.c +++ b/src/sacct/print.c @@ -968,7 +968,7 @@ void print_vsize(type_t type, void *object) case JOB: sacct = job->sacct; nodes = job->nodes; - pos = sacct.min_cpu_id.nodeid; + pos = sacct.min_cpu_id.nodeid; convert_num_unit((float)sacct.max_vsize, buf1, sizeof(buf1), UNIT_KILO); if(job->track_steps) diff --git a/src/sacct/sacct_stat.c b/src/sacct/sacct_stat.c index 427fd1e82..0b8b67622 100644 --- a/src/sacct/sacct_stat.c +++ b/src/sacct/sacct_stat.c @@ -86,7 +86,7 @@ int _sacct_query(slurm_step_layout_t *step_layout, uint32_t job_id, msg.data = &r; - ret_list = slurm_send_recv_msgs(step_layout->node_list, &msg, 0); + ret_list = slurm_send_recv_msgs(step_layout->node_list, &msg, 0, false); if (!ret_list) { error("got an error no list returned"); goto cleanup; diff --git a/src/sacctmgr/account_functions.c b/src/sacctmgr/account_functions.c index 6a83719b5..8a4aa3797 100644 --- a/src/sacctmgr/account_functions.c +++ b/src/sacctmgr/account_functions.c @@ -84,7 +84,7 @@ static int _set_cond(int *start, int argc, char *argv[], } if(slurm_addto_char_list( acct_cond->assoc_cond->acct_list, - argv[i]+end)) + argv[i]+end)) u_set = 1; } else if (!strncasecmp (argv[i], "Clusters", 1)) { if(!acct_cond->assoc_cond->cluster_list) { @@ -146,7 +146,9 @@ static int _set_cond(int *start, int argc, char *argv[], (*start) = i; - if(a_set) + if(u_set && a_set) + return 3; + else if(a_set) return 2; else if(u_set) return 1; @@ -696,7 +698,7 @@ extern int sacctmgr_list_account(int argc, char *argv[]) acct_account_cond_t *acct_cond = xmalloc(sizeof(acct_account_cond_t)); List acct_list; - int i=0; + int i=0, set=0; ListIterator itr = NULL; ListIterator itr2 = NULL; acct_account_rec_t *acct = NULL; @@ -704,6 +706,8 @@ extern int sacctmgr_list_account(int argc, char *argv[]) char *object; List qos_list = NULL; + int field_count = 0; + print_field_t *field = NULL; List format_list = list_create(slurm_destroy_char); @@ -731,7 +735,7 @@ extern int sacctmgr_list_account(int argc, char *argv[]) acct_cond->with_assocs = with_assoc_flag; - _set_cond(&i, argc, argv, acct_cond, format_list); + set = _set_cond(&i, argc, argv, acct_cond, format_list); if(exit_code) { destroy_acct_account_cond(acct_cond); @@ -747,13 +751,25 @@ extern int sacctmgr_list_account(int argc, char *argv[]) slurm_addto_char_list(format_list, "Coord"); } + + if(!acct_cond->with_assocs && set > 1) { + if(!commit_check("You requested options that are only vaild " + "when querying with the withassoc option.\n" + "Are you sure you want to continue?")) { + printf("Aborted\n"); + list_destroy(format_list); + destroy_acct_account_cond(acct_cond); + return SLURM_SUCCESS; + } + } print_fields_list = list_create(destroy_print_field); itr = list_iterator_create(format_list); while((object = list_next(itr))) { field = xmalloc(sizeof(print_field_t)); - if(!strncasecmp("Account", object, 1)) { + if(!strncasecmp("Account", object, 1) + || !strncasecmp("Name", object, 2)) { field->type = PRINT_ACCOUNT; field->name = xstrdup("Account"); field->len = 10; @@ -864,65 +880,89 @@ extern int sacctmgr_list_account(int argc, char *argv[]) itr2 = list_iterator_create(print_fields_list); print_fields_header(print_fields_list); + field_count = list_count(print_fields_list); + while((acct = list_next(itr))) { - if(acct->assoc_list && list_count(acct->assoc_list)) { + if(acct->assoc_list) { ListIterator itr3 = list_iterator_create(acct->assoc_list); - while((assoc = list_next(itr3))) { + int curr_inx = 1; while((field = list_next(itr2))) { switch(field->type) { case PRINT_ACCOUNT: field->print_routine( - field, acct->name); + field, acct->name, + (curr_inx == + field_count)); break; case PRINT_CLUSTER: field->print_routine( - field, assoc->cluster); + field, assoc->cluster, + (curr_inx == + field_count)); break; case PRINT_COORDS: field->print_routine( field, - acct->coordinators); + acct->coordinators, + (curr_inx == + field_count)); break; case PRINT_DESC: field->print_routine( field, - acct->description); + acct->description, + (curr_inx == + field_count)); break; case PRINT_FAIRSHARE: field->print_routine( field, - assoc->fairshare); + assoc->fairshare, + (curr_inx == + field_count)); break; case PRINT_ID: field->print_routine( - field, assoc->id); + field, assoc->id, + (curr_inx == + field_count)); break; case PRINT_MAXC: field->print_routine( field, assoc-> - max_cpu_secs_per_job); + max_cpu_secs_per_job, + (curr_inx == + field_count)); break; case PRINT_MAXJ: field->print_routine( - field, assoc->max_jobs); + field, assoc->max_jobs, + (curr_inx == + field_count)); break; case PRINT_MAXN: field->print_routine( field, assoc-> - max_nodes_per_job); + max_nodes_per_job, + (curr_inx == + field_count)); break; case PRINT_MAXW: field->print_routine( field, assoc-> - max_wall_duration_per_job); + max_wall_duration_per_job, + (curr_inx == + field_count)); break; case PRINT_ORG: field->print_routine( field, - acct->organization); + acct->organization, + (curr_inx == + field_count)); break; case PRINT_QOS: if(!qos_list) { @@ -934,7 +974,9 @@ extern int sacctmgr_list_account(int argc, char *argv[]) field->print_routine( field, qos_list, - acct->qos_list); + acct->qos_list, + (curr_inx == + field_count)); break; case PRINT_QOS_RAW: if(!qos_list) { @@ -946,82 +988,116 @@ extern int sacctmgr_list_account(int argc, char *argv[]) field->print_routine( field, qos_list, - acct->qos_list); + acct->qos_list, + (curr_inx == + field_count)); break; case PRINT_PID: field->print_routine( field, - assoc->parent_id); + assoc->parent_id, + (curr_inx == + field_count)); break; case PRINT_PNAME: field->print_routine( field, - assoc->parent_acct); + assoc->parent_acct, + (curr_inx == + field_count)); break; case PRINT_PART: field->print_routine( field, - assoc->partition); + assoc->partition, + (curr_inx == + field_count)); break; case PRINT_USER: field->print_routine( - field, assoc->user); + field, assoc->user, + (curr_inx == + field_count)); break; default: break; } + curr_inx++; } list_iterator_reset(itr2); printf("\n"); } list_iterator_destroy(itr3); } else { + int curr_inx = 1; while((field = list_next(itr2))) { switch(field->type) { case PRINT_ACCOUNT: field->print_routine( - field, acct->name); + field, acct->name, + (curr_inx == + field_count)); break; case PRINT_CLUSTER: field->print_routine( - field, NULL); + field, NULL, + (curr_inx == + field_count)); break; case PRINT_COORDS: field->print_routine( field, - acct->coordinators); + acct->coordinators, + (curr_inx == + field_count)); break; case PRINT_DESC: field->print_routine( - field, acct->description); + field, acct->description, + (curr_inx == + field_count)); break; case PRINT_FAIRSHARE: field->print_routine( - field, NULL); + field, NULL, + (curr_inx == + field_count)); break; case PRINT_ID: field->print_routine( - field, NULL); + field, NULL, + (curr_inx == + field_count)); break; case PRINT_MAXC: field->print_routine( - field, NULL); + field, NULL, + (curr_inx == + field_count)); break; case PRINT_MAXJ: field->print_routine( - field, NULL); + field, NULL, + (curr_inx == + field_count)); break; case PRINT_MAXN: field->print_routine( - field, NULL); + field, NULL, + (curr_inx == + field_count)); break; case PRINT_MAXW: field->print_routine( - field, NULL); + field, NULL, + (curr_inx == + field_count)); break; case PRINT_ORG: field->print_routine( - field, acct->organization); + field, acct->organization, + (curr_inx == + field_count)); break; case PRINT_QOS: if(!qos_list) { @@ -1032,7 +1108,9 @@ extern int sacctmgr_list_account(int argc, char *argv[]) } field->print_routine( field, qos_list, - acct->qos_list); + acct->qos_list, + (curr_inx == + field_count)); break; case PRINT_QOS_RAW: if(!qos_list) { @@ -1043,27 +1121,38 @@ extern int sacctmgr_list_account(int argc, char *argv[]) } field->print_routine( field, qos_list, - acct->qos_list); + acct->qos_list, + (curr_inx == + field_count)); break; case PRINT_PID: field->print_routine( - field, NULL); + field, NULL, + (curr_inx == + field_count)); break; case PRINT_PNAME: field->print_routine( - field, NULL); + field, NULL, + (curr_inx == + field_count)); break; case PRINT_PART: field->print_routine( - field, NULL); + field, NULL, + (curr_inx == + field_count)); break; case PRINT_USER: field->print_routine( - field, NULL); + field, NULL, + (curr_inx == + field_count)); break; default: break; } + curr_inx++; } list_iterator_reset(itr2); printf("\n"); @@ -1129,6 +1218,10 @@ extern int sacctmgr_modify_account(int argc, char *argv[]) notice_thread_init(); if(rec_set == 3 || rec_set == 1) { // process the account changes if(cond_set == 2) { + exit_code=1; + fprintf(stderr, + " There was a problem with your " + "'where' options.\n"); rc = SLURM_ERROR; goto assoc_start; } @@ -1158,6 +1251,15 @@ extern int sacctmgr_modify_account(int argc, char *argv[]) assoc_start: if(rec_set == 3 || rec_set == 2) { // process the association changes + if(cond_set == 1 && !acct_cond->assoc_cond->acct_list) { + rc = SLURM_ERROR; + exit_code=1; + fprintf(stderr, + " There was a problem with your " + "'where' options.\n"); + goto assoc_end; + } + ret_list = acct_storage_g_modify_associations( db_conn, my_uid, acct_cond->assoc_cond, assoc); @@ -1182,6 +1284,8 @@ assoc_start: list_destroy(ret_list); } +assoc_end: + notice_thread_fini(); if(set) { if(commit_check("Would you like to commit changes?")) @@ -1243,7 +1347,7 @@ extern int sacctmgr_delete_account(int argc, char *argv[]) if(set == 1) { ret_list = acct_storage_g_remove_accounts( db_conn, my_uid, acct_cond); - } else if(set == 2) { + } else if(set == 2 || set == 3) { ret_list = acct_storage_g_remove_associations( db_conn, my_uid, acct_cond->assoc_cond); } @@ -1255,7 +1359,7 @@ extern int sacctmgr_delete_account(int argc, char *argv[]) ListIterator itr = list_iterator_create(ret_list); if(set == 1) { printf(" Deleting accounts...\n"); - } else if(set == 2) { + } else if(set == 2 || set == 3) { printf(" Deleting account associations...\n"); } while((object = list_next(itr))) { diff --git a/src/sacctmgr/association_functions.c b/src/sacctmgr/association_functions.c index 461a19fdb..90b739a2e 100644 --- a/src/sacctmgr/association_functions.c +++ b/src/sacctmgr/association_functions.c @@ -173,6 +173,7 @@ static int _set_cond(int *start, int argc, char *argv[], argv[i]+end); set = 1; } else if (!strncasecmp (argv[i], "Parent", 4)) { + xfree(association_cond->parent_acct); association_cond->parent_acct = strip_quotes(argv[i]+end, NULL); set = 1; @@ -197,6 +198,14 @@ static int _sort_childern_list(sacctmgr_assoc_t *assoc_a, sacctmgr_assoc_t *assoc_b) { int diff = 0; + + /* first just check the lfts and rgts if a lft is inside of the + * others lft and rgt just return it is less + */ + if(assoc_a->assoc->lft > assoc_b->assoc->lft + && assoc_a->assoc->lft < assoc_b->assoc->rgt) + return 1; + /* check to see if this is a user association or an account. * We want the accounts at the bottom */ @@ -370,7 +379,7 @@ extern int sacctmgr_list_association(int argc, char *argv[]) return SLURM_ERROR; } else if(!list_count(format_list)) slurm_addto_char_list(format_list, - "C,A,U,F,MaxC,MaxJ,MaxN,MaxW"); + "C,A,U,Part,F,MaxC,MaxJ,MaxN,MaxW"); print_fields_list = list_create(destroy_print_field); @@ -405,7 +414,8 @@ extern int sacctmgr_list_association(int argc, char *argv[]) field->name = xstrdup("LFT"); field->len = 6; field->print_routine = print_fields_uint; - } else if(!strncasecmp("MaxCPUSecs", object, 4)) { + } else if(!strncasecmp("MaxCPUSecs", object, 4) + || !strncasecmp("MaxProcSecsPerJob", object, 4)) { field->type = PRINT_MAXC; field->name = xstrdup("MaxCPUSecs"); field->len = 11; diff --git a/src/sacctmgr/cluster_functions.c b/src/sacctmgr/cluster_functions.c index 3a39ddb32..1eeea8540 100644 --- a/src/sacctmgr/cluster_functions.c +++ b/src/sacctmgr/cluster_functions.c @@ -54,7 +54,8 @@ static int _set_cond(int *start, int argc, char *argv[], break; } else if(!end && !strncasecmp(argv[i], "where", 5)) { continue; - } else if(!end || !strncasecmp (argv[i], "Names", 1)) { + } else if(!end || !strncasecmp (argv[i], "Names", 1) + || !strncasecmp (argv[i], "Clusters", 1)) { if(cluster_list) { if(slurm_addto_char_list(cluster_list, argv[i]+end)) @@ -329,6 +330,8 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) acct_cluster_rec_t *cluster = NULL; char *object; + int field_count = 0; + print_field_t *field = NULL; List format_list = list_create(slurm_destroy_char); @@ -365,7 +368,8 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) itr = list_iterator_create(format_list); while((object = list_next(itr))) { field = xmalloc(sizeof(print_field_t)); - if(!strncasecmp("Cluster", object, 2)) { + if(!strncasecmp("Cluster", object, 2) + || !strncasecmp("Name", object, 2)) { field->type = PRINT_CLUSTER; field->name = xstrdup("Cluster"); field->len = 10; @@ -436,50 +440,62 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) itr2 = list_iterator_create(print_fields_list); print_fields_header(print_fields_list); + field_count = list_count(print_fields_list); + while((cluster = list_next(itr))) { + int curr_inx = 1; while((field = list_next(itr2))) { switch(field->type) { case PRINT_CLUSTER: field->print_routine(field, - cluster->name); + cluster->name, + (curr_inx == field_count)); break; case PRINT_CHOST: field->print_routine(field, - cluster->control_host); + cluster->control_host, + (curr_inx == field_count)); break; case PRINT_CPORT: field->print_routine(field, - cluster->control_port); + cluster->control_port, + (curr_inx == field_count)); break; case PRINT_FAIRSHARE: field->print_routine( field, - cluster->default_fairshare); + cluster->default_fairshare, + (curr_inx == field_count)); break; case PRINT_MAXC: field->print_routine( field, - cluster->default_max_cpu_secs_per_job); + cluster->default_max_cpu_secs_per_job, + (curr_inx == field_count)); break; case PRINT_MAXJ: field->print_routine( field, - cluster->default_max_jobs); + cluster->default_max_jobs, + (curr_inx == field_count)); break; case PRINT_MAXN: field->print_routine( field, - cluster->default_max_nodes_per_job); + cluster->default_max_nodes_per_job, + (curr_inx == field_count)); break; case PRINT_MAXW: field->print_routine( field, cluster-> - default_max_wall_duration_per_job); + default_max_wall_duration_per_job, + (curr_inx == field_count)); break; default: break; } + curr_inx++; } list_iterator_reset(itr2); printf("\n"); diff --git a/src/sacctmgr/common.c b/src/sacctmgr/common.c index d043fcdf9..0f5d2db96 100644 --- a/src/sacctmgr/common.c +++ b/src/sacctmgr/common.c @@ -46,8 +46,8 @@ static pthread_t lock_warning_thread; static void *_print_lock_warn(void *no_data) { - sleep(2); - printf(" Waiting for lock from other user.\n"); + sleep(5); + printf(" Database is busy or waiting for lock from other user.\n"); return NULL; } @@ -132,6 +132,12 @@ extern char *strip_quotes(char *option, int *increased) break; } else if(option[i] == '\"' || option[i] == '\'') option[i] = '`'; + else { + char lower = tolower(option[i]); + if(lower != option[i]) + option[i] = lower; + } + i++; } end += i; @@ -393,14 +399,19 @@ extern acct_association_rec_t *sacctmgr_find_association_from_list( itr = list_iterator_create(assoc_list); while((assoc = list_next(itr))) { - if((user && (!assoc->user || strcasecmp(user, assoc->user))) - || (account && (!assoc->acct - || strcasecmp(account, assoc->acct))) - || (cluster && (!assoc->cluster - || strcasecmp(cluster, assoc->cluster))) - || (partition && (!assoc->partition - || strcasecmp(partition, - assoc->partition)))) + if(((!user && assoc->user) + || (user && (!assoc->user + || strcasecmp(user, assoc->user)))) + || ((!account && assoc->acct) + || (account && (!assoc->acct + || strcasecmp(account, assoc->acct)))) + || ((!cluster && assoc->cluster) + || (cluster && (!assoc->cluster + || strcasecmp(cluster, assoc->cluster)))) + || ((!partition && assoc->partition) + || (partition && (!assoc->partition + || strcasecmp(partition, + assoc->partition))))) continue; break; } @@ -644,7 +655,8 @@ end_it: return count; } -extern void sacctmgr_print_coord_list(print_field_t *field, List value) +extern void sacctmgr_print_coord_list( + print_field_t *field, List value, int last) { ListIterator itr = NULL; char *print_this = NULL; @@ -668,7 +680,10 @@ extern void sacctmgr_print_coord_list(print_field_t *field, List value) list_iterator_destroy(itr); } - if(print_fields_parsable_print) + if(print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING + && last) + printf("%s", print_this); + else if(print_fields_parsable_print) printf("%s|", print_this); else { if(strlen(print_this) > field->len) @@ -680,13 +695,16 @@ extern void sacctmgr_print_coord_list(print_field_t *field, List value) } extern void sacctmgr_print_qos_list(print_field_t *field, List qos_list, - List value) + List value, int last) { char *print_this = NULL; print_this = get_qos_complete_str(qos_list, value); - if(print_fields_parsable_print) + if(print_fields_parsable_print == PRINT_FIELDS_PARSABLE_NO_ENDING + && last) + printf("%s", print_this); + else if(print_fields_parsable_print) printf("%s|", print_this); else { if(strlen(print_this) > field->len) diff --git a/src/sacctmgr/file_functions.c b/src/sacctmgr/file_functions.c index 4e3b82edf..f85e6198e 100644 --- a/src/sacctmgr/file_functions.c +++ b/src/sacctmgr/file_functions.c @@ -211,7 +211,7 @@ static sacctmgr_file_opts_t *_parse_options(char *options) file_opts->max_jobs = INFINITE; file_opts->max_nodes_per_job = INFINITE; file_opts->max_wall_duration_per_job = INFINITE; - file_opts->admin = ACCT_ADMIN_NONE; + file_opts->admin = ACCT_ADMIN_NOTSET; while(options[i]) { quote = 0; @@ -594,46 +594,56 @@ static int _mod_cluster(sacctmgr_file_opts_t *file_opts, if(cluster->default_fairshare != file_opts->fairshare) { mod_assoc.fairshare = file_opts->fairshare; changed = 1; - xstrfmtcat(my_info, " Changed Cluster default for " - "fairshare from %d -> %d\n", - cluster->default_fairshare, - file_opts->fairshare); + xstrfmtcat(my_info, + "%-30.30s for %-7.7s %-10.10s %8d -> %d\n", + " Changed fairshare", "Cluster", + cluster->name, + cluster->default_fairshare, + file_opts->fairshare); } if(cluster->default_max_cpu_secs_per_job != file_opts->max_cpu_secs_per_job) { mod_assoc.max_cpu_secs_per_job = file_opts->max_cpu_secs_per_job; changed = 1; - printf(" Changed Cluster default for " - "MaxCPUSecsPerJob from %d -> %d\n", - cluster->default_max_cpu_secs_per_job, - file_opts->max_cpu_secs_per_job); + xstrfmtcat(my_info, + "%-30.30s for %-7.7s %-10.10s %8d -> %d\n", + " Changed MaxCPUSecsPerJob", "Cluster", + cluster->name, + cluster->default_max_cpu_secs_per_job, + file_opts->max_cpu_secs_per_job); } if(cluster->default_max_jobs != file_opts->max_jobs) { mod_assoc.max_jobs = file_opts->max_jobs; changed = 1; - printf(" Changed Cluster default for " - "MaxJobs from %d -> %d\n", - cluster->default_max_jobs, - file_opts->max_jobs); + xstrfmtcat(my_info, + "%-30.30s for %-7.7s %-10.10s %8d -> %d\n", + " Changed MaxJobs", "Cluster", + cluster->name, + cluster->default_max_jobs, + file_opts->max_jobs); } if(cluster->default_max_nodes_per_job != file_opts->max_nodes_per_job) { mod_assoc.max_nodes_per_job = file_opts->max_nodes_per_job; changed = 1; - printf(" Changed Cluster default for " - "MaxNodesPerJob from %d -> %d\n", - cluster->default_max_nodes_per_job, - file_opts->max_nodes_per_job); + xstrfmtcat(my_info, + "%-30.30s for %-7.7s %-10.10s %8d -> %d\n", + " Changed MaxNodesPerJob", "Cluster", + cluster->name, + cluster->default_max_nodes_per_job, + file_opts->max_nodes_per_job); } if(cluster->default_max_wall_duration_per_job != file_opts->max_wall_duration_per_job) { mod_assoc.max_wall_duration_per_job = file_opts->max_wall_duration_per_job; changed = 1; - printf(" Changed Cluster default for " - "MaxWallDurationPerJob from %d -> %d\n", - cluster->default_max_wall_duration_per_job, - file_opts->max_wall_duration_per_job); + xstrfmtcat(my_info, + "%-30.30s for %-7.7s %-10.10s %8d -> %d\n", + " Changed MaxWallDurationPerJob", "Cluster", + cluster->name, + cluster->default_max_wall_duration_per_job, + file_opts->max_wall_duration_per_job); } if(changed) { @@ -666,9 +676,11 @@ static int _mod_cluster(sacctmgr_file_opts_t *file_opts, /* } */ if(ret_list) { + printf("%s", my_info); list_destroy(ret_list); } else changed = 0; + xfree(my_info); } return changed; @@ -678,7 +690,7 @@ static int _mod_acct(sacctmgr_file_opts_t *file_opts, acct_account_rec_t *acct, char *parent) { int changed = 0; - char *desc = NULL, *org = NULL; + char *desc = NULL, *org = NULL, *my_info = NULL; acct_account_rec_t mod_acct; acct_account_cond_t acct_cond; acct_association_cond_t assoc_cond; @@ -691,11 +703,12 @@ static int _mod_acct(sacctmgr_file_opts_t *file_opts, desc = xstrdup(file_opts->desc); if(desc && strcmp(desc, acct->description)) { - printf(" Changed description for account " - "'%s' from '%s' to '%s'\n", - acct->name, - acct->description, - desc); + xstrfmtcat(my_info, + "%-30.30s for %-7.7s %-10.10s %8s -> %s\n", + " Changed description", "Account", + acct->name, + acct->description, + desc); mod_acct.description = desc; changed = 1; } else @@ -705,11 +718,12 @@ static int _mod_acct(sacctmgr_file_opts_t *file_opts, org = xstrdup(file_opts->org); if(org && strcmp(org, acct->organization)) { - printf(" Changed organization for account '%s' " - "from '%s' to '%s'\n", - acct->name, - acct->organization, - org); + xstrfmtcat(my_info, + "%-30.30s for %-7.7s %-10.10s %8s -> %s\n", + " Changed organization", "Account", + acct->name, + acct->organization, + org); mod_acct.organization = org; changed = 1; } else @@ -739,9 +753,10 @@ static int _mod_acct(sacctmgr_file_opts_t *file_opts, new_qos = get_qos_complete_str(qos_list, mod_acct.qos_list); if(new_qos) { - printf(" Adding QOS for account '%s' '%s'\n", - acct->name, - new_qos); + xstrfmtcat(my_info, + " Adding QOS for account '%s' '%s'\n", + acct->name, + new_qos); xfree(new_qos); changed = 1; } else { @@ -753,9 +768,10 @@ static int _mod_acct(sacctmgr_file_opts_t *file_opts, file_opts->qos_list); if(new_qos) { - printf(" Adding QOS for account '%s' '%s'\n", - acct->name, - new_qos); + xstrfmtcat(my_info, + " Adding QOS for account '%s' '%s'\n", + acct->name, + new_qos); xfree(new_qos); mod_acct.qos_list = file_opts->qos_list; file_opts->qos_list = NULL; @@ -792,9 +808,11 @@ static int _mod_acct(sacctmgr_file_opts_t *file_opts, /* } */ if(ret_list) { + printf("%s", my_info); list_destroy(ret_list); } else changed = 0; + xfree(my_info); } xfree(desc); xfree(org); @@ -807,12 +825,16 @@ static int _mod_user(sacctmgr_file_opts_t *file_opts, int rc; int set = 0; int changed = 0; - char *def_acct = NULL; + char *def_acct = NULL, *my_info = NULL; acct_user_rec_t mod_user; acct_user_cond_t user_cond; List ret_list = NULL; acct_association_cond_t assoc_cond; - + + if(!user || !user->name) { + fatal(" We need a user name in _mod_user"); + } + memset(&mod_user, 0, sizeof(acct_user_rec_t)); memset(&user_cond, 0, sizeof(acct_user_cond_t)); memset(&assoc_cond, 0, sizeof(acct_association_cond_t)); @@ -824,12 +846,14 @@ static int _mod_user(sacctmgr_file_opts_t *file_opts, if(file_opts->def_acct) def_acct = xstrdup(file_opts->def_acct); - if(def_acct && strcmp(def_acct, user->default_acct)) { - printf(" Changed User '%s' " - "default account '%s' -> '%s'\n", - user->name, - user->default_acct, - def_acct); + if(def_acct && + (!user->default_acct || strcmp(def_acct, user->default_acct))) { + xstrfmtcat(my_info, + "%-30.30s for %-7.7s %-10.10s %8s -> %s\n", + " Changed Default Account", "User", + user->name, + user->default_acct, + def_acct); mod_user.default_acct = def_acct; changed = 1; } else @@ -859,9 +883,10 @@ static int _mod_user(sacctmgr_file_opts_t *file_opts, new_qos = get_qos_complete_str(qos_list, mod_user.qos_list); if(new_qos) { - printf(" Adding QOS for user '%s' '%s'\n", - user->name, - new_qos); + xstrfmtcat(my_info, + " Adding QOS for user '%s' '%s'\n", + user->name, + new_qos); xfree(new_qos); changed = 1; } else @@ -872,9 +897,10 @@ static int _mod_user(sacctmgr_file_opts_t *file_opts, file_opts->qos_list); if(new_qos) { - printf(" Adding QOS for user '%s' '%s'\n", - user->name, - new_qos); + xstrfmtcat(my_info, + " Adding QOS for user '%s' '%s'\n", + user->name, + new_qos); xfree(new_qos); mod_user.qos_list = file_opts->qos_list; file_opts->qos_list = NULL; @@ -882,14 +908,17 @@ static int _mod_user(sacctmgr_file_opts_t *file_opts, } } - if(user->admin_level != file_opts->admin) { - printf(" Changed User '%s' " - "AdminLevel '%s' -> '%s'\n", - user->name, - acct_admin_level_str( - user->admin_level), - acct_admin_level_str( - file_opts->admin)); + if(user->admin_level != ACCT_ADMIN_NOTSET + && file_opts->admin != ACCT_ADMIN_NOTSET + && user->admin_level != file_opts->admin) { + xstrfmtcat(my_info, + "%-30.30s for %-7.7s %-10.10s %8s -> %s\n", + " Changed Admin Level", "User", + user->name, + acct_admin_level_str( + user->admin_level), + acct_admin_level_str( + file_opts->admin)); mod_user.admin_level = file_opts->admin; changed = 1; } @@ -916,9 +945,11 @@ static int _mod_user(sacctmgr_file_opts_t *file_opts, /* } */ if(ret_list) { + printf("%s", my_info); list_destroy(ret_list); set = 1; } + xfree(my_info); } xfree(def_acct); @@ -1052,7 +1083,8 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, mod_assoc.fairshare = file_opts->fairshare; changed = 1; xstrfmtcat(my_info, - " Changed fairshare for %s '%s' from %d to %d\n", + "%-30.30s for %-7.7s %-10.10s %8d -> %d\n", + " Changed fairshare", type, name, assoc->fairshare, file_opts->fairshare); @@ -1062,8 +1094,8 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, file_opts->max_cpu_secs_per_job; changed = 1; xstrfmtcat(my_info, - " Changed MaxCPUSecsPerJob for %s " - "'%s' from %d to %d\n", + "%-30.30s for %-7.7s %-10.10s %8d -> %d\n", + " Changed MaxCPUSecsPerJob", type, name, assoc->max_cpu_secs_per_job, file_opts->max_cpu_secs_per_job); @@ -1072,7 +1104,8 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, mod_assoc.max_jobs = file_opts->max_jobs; changed = 1; xstrfmtcat(my_info, - " Changed MaxJobs for %s '%s' from %d to %d\n", + "%-30.30s for %-7.7s %-10.10s %8d -> %d\n", + " Changed MaxJobs", type, name, assoc->max_jobs, file_opts->max_jobs); @@ -1081,8 +1114,8 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, mod_assoc.max_nodes_per_job = file_opts->max_nodes_per_job; changed = 1; xstrfmtcat(my_info, - " Changed MaxNodesPerJob for %s '%s' " - "from %d to %d\n", + "%-30.30s for %-7.7s %-10.10s %8d -> %d\n", + " Changed MaxNodesPerJob", type, name, assoc->max_nodes_per_job, file_opts->max_nodes_per_job); @@ -1093,8 +1126,8 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, file_opts->max_wall_duration_per_job; changed = 1; xstrfmtcat(my_info, - " Changed MaxWallDurationPerJob for %s '%s' " - "from %d to %d\n", + "%-30.30s for %-7.7s %-10.10s %8d -> %d\n", + " Changed MaxWallDurationPerJob", type, name, assoc->max_wall_duration_per_job, file_opts->max_wall_duration_per_job); @@ -1156,6 +1189,80 @@ static int _mod_assoc(sacctmgr_file_opts_t *file_opts, return changed; } +static acct_user_rec_t *_set_user_up(sacctmgr_file_opts_t *file_opts, + char *parent) +{ + acct_user_rec_t *user = xmalloc(sizeof(acct_user_rec_t)); + + user->assoc_list = NULL; + user->name = xstrdup(file_opts->name); + + if(file_opts->def_acct) + user->default_acct = xstrdup(file_opts->def_acct); + else + user->default_acct = xstrdup(parent); + + user->qos_list = file_opts->qos_list; + file_opts->qos_list = NULL; + user->admin_level = file_opts->admin; + + if(file_opts->coord_list) { + acct_user_cond_t user_cond; + acct_association_cond_t assoc_cond; + ListIterator coord_itr = NULL; + char *temp_char = NULL; + acct_coord_rec_t *coord = NULL; + + memset(&user_cond, 0, sizeof(acct_user_cond_t)); + memset(&assoc_cond, 0, sizeof(acct_association_cond_t)); + assoc_cond.user_list = list_create(NULL); + list_append(assoc_cond.user_list, user->name); + user_cond.assoc_cond = &assoc_cond; + + notice_thread_init(); + acct_storage_g_add_coord(db_conn, my_uid, + file_opts->coord_list, + &user_cond); + notice_thread_fini(); + list_destroy(assoc_cond.user_list); + user->coord_accts = list_create(destroy_acct_coord_rec); + coord_itr = list_iterator_create(file_opts->coord_list); + while((temp_char = list_next(coord_itr))) { + coord = xmalloc(sizeof(acct_coord_rec_t)); + coord->name = xstrdup(temp_char); + coord->direct = 1; + list_push(user->coord_accts, coord); + } + list_iterator_destroy(coord_itr); + } + return user; +} + + +static acct_account_rec_t *_set_acct_up(sacctmgr_file_opts_t *file_opts, + char *parent) +{ + acct_account_rec_t *acct = xmalloc(sizeof(acct_account_rec_t)); + acct->assoc_list = NULL; + acct->name = xstrdup(file_opts->name); + if(file_opts->desc) + acct->description = xstrdup(file_opts->desc); + else + acct->description = xstrdup(file_opts->name); + if(file_opts->org) + acct->organization = xstrdup(file_opts->org); + else if(strcmp(parent, "root")) + acct->organization = xstrdup(parent); + else + acct->organization = xstrdup(file_opts->name); + /* info("adding acct %s (%s) (%s)", */ +/* acct->name, acct->description, */ +/* acct->organization); */ + acct->qos_list = file_opts->qos_list; + file_opts->qos_list = NULL; + + return acct; +} static int _print_file_sacctmgr_assoc_childern(FILE *fd, List sacctmgr_assoc_list, @@ -1488,7 +1595,7 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) if(rc != SLURM_SUCCESS) { exit_code=1; fprintf(stderr, - " Problem adding machine\n"); + " Problem adding cluster\n"); rc = SLURM_ERROR; _destroy_sacctmgr_file_opts(file_opts); break; @@ -1575,28 +1682,7 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) curr_acct_list, file_opts->name)) && !sacctmgr_find_account_from_list( acct_list, file_opts->name)) { - acct = xmalloc(sizeof(acct_account_rec_t)); - acct->assoc_list = NULL; - acct->name = xstrdup(file_opts->name); - if(file_opts->desc) - acct->description = - xstrdup(file_opts->desc); - else - acct->description = - xstrdup(file_opts->name); - if(file_opts->org) - acct->organization = - xstrdup(file_opts->org); - else if(strcmp(parent, "root")) - acct->organization = xstrdup(parent); - else - acct->organization = - xstrdup(file_opts->name); - /* info("adding acct %s (%s) (%s)", */ -/* acct->name, acct->description, */ -/* acct->organization); */ - acct->qos_list = file_opts->qos_list; - file_opts->qos_list = NULL; + acct = _set_acct_up(file_opts, parent); list_append(acct_list, acct); /* don't add anything to the curr_acct_list */ @@ -1696,65 +1782,13 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) rc = SLURM_ERROR; break; } + if(!(user = sacctmgr_find_user_from_list( curr_user_list, file_opts->name)) && !sacctmgr_find_user_from_list( user_list, file_opts->name)) { - user = xmalloc(sizeof(acct_user_rec_t)); - user->assoc_list = NULL; - user->name = xstrdup(file_opts->name); - if(file_opts->def_acct) - user->default_acct = - xstrdup(file_opts->def_acct); - else - user->default_acct = xstrdup(parent); - - user->qos_list = file_opts->qos_list; - file_opts->qos_list = NULL; - user->admin_level = file_opts->admin; - - if(file_opts->coord_list) { - acct_user_cond_t user_cond; - acct_association_cond_t assoc_cond; - ListIterator coord_itr = NULL; - char *temp_char = NULL; - acct_coord_rec_t *coord = NULL; - memset(&user_cond, 0, - sizeof(acct_user_cond_t)); - memset(&assoc_cond, 0, - sizeof(acct_association_cond_t)); - assoc_cond.user_list = - list_create(NULL); - list_append(assoc_cond.user_list, - user->name); - user_cond.assoc_cond = &assoc_cond; - - notice_thread_init(); - rc = acct_storage_g_add_coord( - db_conn, my_uid, - file_opts->coord_list, - &user_cond); - notice_thread_fini(); - list_destroy(assoc_cond.user_list); - user->coord_accts = list_create( - destroy_acct_coord_rec); - coord_itr = list_iterator_create( - file_opts->coord_list); - while((temp_char = - list_next(coord_itr))) { - coord = xmalloc( - sizeof - (acct_coord_rec_t)); - coord->name = - xstrdup(temp_char); - coord->direct = 1; - list_push(user->coord_accts, - coord); - } - list_iterator_destroy(coord_itr); - } - + user = _set_user_up(file_opts, parent); list_append(user_list, user); /* don't add anything to the curr_user_list */ @@ -1785,6 +1819,17 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) file_opts->name, parent, cluster_name, file_opts->part)) { + + /* This means the user was added + * during this round but this is a new + * association we are adding + */ + if(!user) + goto new_association; + + /* This means there could be a change + * on the user. + */ user2 = sacctmgr_find_user_from_list( mod_user_list, file_opts->name); if(!user2) { @@ -1797,7 +1842,7 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) } else { debug2("already modified this user"); } - + new_association: assoc = xmalloc(sizeof(acct_association_rec_t)); assoc->acct = xstrdup(parent); assoc->cluster = xstrdup(cluster_name); diff --git a/src/sacctmgr/sacctmgr.c b/src/sacctmgr/sacctmgr.c index fe53cf9a0..76eb21cc0 100644 --- a/src/sacctmgr/sacctmgr.c +++ b/src/sacctmgr/sacctmgr.c @@ -41,16 +41,15 @@ #include "src/sacctmgr/sacctmgr.h" #include "src/common/xsignal.h" -#define OPT_LONG_HIDE 0x102 #define BUFFER_SIZE 4096 char *command_name; -int all_flag; /* display even hidden partitions */ int exit_code; /* sacctmgr's exit code, =1 on any error at any time */ int exit_flag; /* program to terminate if =1 */ int input_words; /* number of words of input permitted */ int one_liner; /* one record per line if =1 */ int quiet_flag; /* quiet=1, verbose=-1, normal=0 */ +int readonly_flag; /* make it so you can only run list commands */ int verbosity; /* count of -v options */ int rollback_flag; /* immediate execute=1, else = 0 */ int with_assoc_flag = 0; @@ -76,14 +75,14 @@ main (int argc, char *argv[]) int option_index; static struct option long_options[] = { - {"all", 0, 0, 'a'}, {"help", 0, 0, 'h'}, - {"hide", 0, 0, OPT_LONG_HIDE}, {"immediate",0, 0, 'i'}, {"oneliner", 0, 0, 'o'}, {"no_header", 0, 0, 'n'}, {"parsable", 0, 0, 'p'}, + {"parsable2", 0, 0, 'P'}, {"quiet", 0, 0, 'q'}, + {"readonly", 0, 0, 'r'}, {"associations", 0, 0, 's'}, {"usage", 0, 0, 'h'}, {"verbose", 0, 0, 'v'}, @@ -92,19 +91,16 @@ main (int argc, char *argv[]) }; command_name = argv[0]; - all_flag = 0; rollback_flag = 1; exit_code = 0; exit_flag = 0; input_field_count = 0; quiet_flag = 0; + readonly_flag = 0; verbosity = 0; log_init("sacctmgr", opts, SYSLOG_FACILITY_DAEMON, NULL); - if (getenv ("SACCTMGR_ALL")) - all_flag= 1; - - while((opt_char = getopt_long(argc, argv, "ahionpqsvV", + while((opt_char = getopt_long(argc, argv, "hionpPqrsvV", long_options, &option_index)) != -1) { switch (opt_char) { case (int)'?': @@ -112,16 +108,10 @@ main (int argc, char *argv[]) "for more information\n"); exit(1); break; - case (int)'a': - all_flag = 1; - break; case (int)'h': _usage (); exit(exit_code); break; - case OPT_LONG_HIDE: - all_flag = 0; - break; case (int)'i': rollback_flag = 0; break; @@ -132,11 +122,19 @@ main (int argc, char *argv[]) print_fields_have_header = 0; break; case (int)'p': - print_fields_parsable_print = 1; + print_fields_parsable_print = + PRINT_FIELDS_PARSABLE_ENDING; + break; + case (int)'P': + print_fields_parsable_print = + PRINT_FIELDS_PARSABLE_NO_ENDING; break; case (int)'q': quiet_flag = 1; break; + case (int)'r': + readonly_flag = 1; + break; case (int)'s': with_assoc_flag = 1; break; @@ -325,8 +323,6 @@ _process_command (int argc, char *argv[]) exit_code = 1; if (quiet_flag == -1) fprintf(stderr, "no input"); - } else if (strncasecmp (argv[0], "all", 3) == 0) { - all_flag = 1; } else if (strncasecmp (argv[0], "associations", 3) == 0) { with_assoc_flag = 1; } else if (strncasecmp (argv[0], "dump", 3) == 0) { @@ -346,8 +342,6 @@ _process_command (int argc, char *argv[]) argv[0]); } _usage (); - } else if (strncasecmp (argv[0], "hide", 2) == 0) { - all_flag = 0; } else if (strncasecmp (argv[0], "load", 2) == 0) { if (argc < 2) { exit_code = 1; @@ -427,6 +421,14 @@ _process_command (int argc, char *argv[]) argv[0]); } quiet_flag = -1; + } else if (strncasecmp (argv[0], "readonly", 4) == 0) { + if (argc > 1) { + exit_code = 1; + fprintf (stderr, + "too many arguments for %s keyword\n", + argv[0]); + } + readonly_flag = 1; } else if (strncasecmp (argv[0], "rollup", 2) == 0) { time_t my_time = 0; if (argc > 2) { @@ -472,6 +474,12 @@ static void _add_it (int argc, char *argv[]) { int error_code = SLURM_SUCCESS; + if(readonly_flag) { + exit_code = 1; + fprintf(stderr, "Can't run this command in readonly mode.\n"); + return; + } + /* First identify the entity to add */ if (strncasecmp (argv[0], "Account", 1) == 0) { error_code = sacctmgr_add_account((argc - 1), &argv[1]); @@ -543,6 +551,13 @@ static void _modify_it (int argc, char *argv[]) { int error_code = SLURM_SUCCESS; + + if(readonly_flag) { + exit_code = 1; + fprintf(stderr, "Can't run this command in readonly mode.\n"); + return; + } + /* First identify the entity to modify */ if (strncasecmp (argv[0], "Account", 1) == 0) { error_code = sacctmgr_modify_account((argc - 1), &argv[1]); @@ -572,6 +587,12 @@ static void _delete_it (int argc, char *argv[]) { int error_code = SLURM_SUCCESS; + if(readonly_flag) { + exit_code = 1; + fprintf(stderr, "Can't run this command in readonly mode.\n"); + return; + } + /* First identify the entity to delete */ if (strncasecmp (argv[0], "Account", 1) == 0) { error_code = sacctmgr_delete_account((argc - 1), &argv[1]); @@ -601,14 +622,14 @@ void _usage () { printf ("\ sacctmgr [<OPTION>] [<COMMAND>] \n\ Valid <OPTION> values are: \n\ - -a or --all: equivalent to \"all\" command \n\ -h or --help: equivalent to \"help\" command \n\ - --hide: equivalent to \"hide\" command \n\ -i or --immediate: commit changes immediately \n\ -n or --no_header: no header will be added to the beginning of output \n\ -o or --oneliner: equivalent to \"oneliner\" command \n\ - -p or --parsable: output will be '|' delimited \n\ + -p or --parsable: output will be '|' delimited with a '|' at the end \n\ + -P or --parsable2: output will be '|' delimited without a '|' at the end\n\ -q or --quiet: equivalent to \"quiet\" command \n\ + -r or --readonly: equivalent to \"readonly\" command \n\ -s or --associations: equivalent to \"associations\" command \n\ -v or --verbose: equivalent to \"verbose\" command \n\ -V or --version: equivalent to \"version\" command \n\ @@ -618,23 +639,21 @@ sacctmgr [<OPTION>] [<COMMAND>] \n\ terminated. \n\ \n\ Valid <COMMAND> values are: \n\ - all display information about all entities, \n\ - including hidden/deleted ones. \n\ add <ENTITY> <SPECS> add entity \n\ associations when using show/list will list the \n\ associations associated with the entity. \n\ delete <ENTITY> <SPECS> delete the specified entity(s) \n\ exit terminate sacctmgr \n\ help print this description of use. \n\ - hide do not display information about \n\ - hidden/deleted entities. \n\ list <ENTITY> [<SPECS>] display info of identified entity, default \n\ is display all. \n\ modify <ENTITY> <SPECS> modify entity \n\ oneliner report output one record per line. \n\ + parsable output will be | delimited with an ending '|'\n\ + parsable2 output will be | delimited without an ending '|'\n\ + readonly makes it so no modification can happen. \n\ quiet print no messages other than error messages. \n\ quit terminate this command. \n\ - parsable output will be | delimited \n\ show same as list \n\ verbose enable detailed logging. \n\ version display tool version number. \n\ @@ -645,7 +664,8 @@ sacctmgr [<OPTION>] [<COMMAND>] \n\ \n\ <SPECS> are different for each command entity pair. \n\ list account - Clusters=, Descriptions=, Format=, Names=, \n\ - Organizations=, Parents=, and WithAssocs \n\ + Organizations=, Parents=, WithCoor=, \n\ + and WithAssocs \n\ add account - Clusters=, Description=, Fairshare=, \n\ MaxCPUSecs=, MaxJobs=, MaxNodes=, MaxWall=, \n\ Names=, Organization=, Parent=, and QosLevel \n\ @@ -658,7 +678,8 @@ sacctmgr [<OPTION>] [<COMMAND>] \n\ Organizations=, and Parents= \n\ \n\ list associations - Accounts=, Clusters=, Format=, ID=, \n\ - Partitions=, Parent=, Tree, Users= \n\ + Partitions=, Parent=, Tree, Users=, \n\ + WithDeleted, WOPInfo, WOPLimits \n\ \n\ list cluster - Names= Format= \n\ add cluster - Fairshare=, MaxCPUSecs=, \n\ @@ -679,7 +700,7 @@ sacctmgr [<OPTION>] [<COMMAND>] \n\ Format=, ID=, and Start= \n\ \n\ list user - AdminLevel=, DefaultAccounts=, Format=, Names=,\n\ - QosLevel=, and WithAssocs \n\ + QosLevel=, WithCoor=, and WithAssocs \n\ add user - Accounts=, AdminLevel=, Clusters=, \n\ DefaultAccount=, Fairshare=, MaxCPUSecs=, \n\ MaxJobs=, MaxNodes=, MaxWall=, Names=, \n\ diff --git a/src/sacctmgr/sacctmgr.h b/src/sacctmgr/sacctmgr.h index b935efa51..4fc865fd4 100644 --- a/src/sacctmgr/sacctmgr.h +++ b/src/sacctmgr/sacctmgr.h @@ -101,6 +101,7 @@ extern int one_liner; /* one record per line if =1 */ extern int quiet_flag; /* quiet=1, verbose=-1, normal=0 */ extern int rollback_flag;/* immediate execute=0, else = 1 */ extern int with_assoc_flag;/* show acct/user associations flag */ +extern int readonly_flag; /* make it so you can only run list commands */ extern void *db_conn; extern uint32_t my_uid; @@ -146,9 +147,10 @@ extern int commit_check(char *warning); extern int get_uint(char *in_value, uint32_t *out_value, char *type); extern int addto_qos_char_list(List char_list, List qos_list, char *names, int option); -extern void sacctmgr_print_coord_list(print_field_t *field, List value); +extern void sacctmgr_print_coord_list( + print_field_t *field, List value, int last); extern void sacctmgr_print_qos_list(print_field_t *field, List qos_list, - List value); + List value, int last); extern char *get_qos_complete_str(List qos_list, List num_qos_list); extern int sort_coord_list(acct_coord_rec_t *coord_a, acct_coord_rec_t *coord_b); diff --git a/src/sacctmgr/txn_functions.c b/src/sacctmgr/txn_functions.c index b98286b01..17ea28bd8 100644 --- a/src/sacctmgr/txn_functions.c +++ b/src/sacctmgr/txn_functions.c @@ -204,7 +204,8 @@ extern int sacctmgr_list_txn(int argc, char *argv[]) case PRINT_ACTION: field->print_routine( field, - slurmdbd_msg_type_2_str(txn->action)); + slurmdbd_msg_type_2_str(txn->action, + 0)); break; case PRINT_ACTOR: field->print_routine(field, diff --git a/src/sacctmgr/user_functions.c b/src/sacctmgr/user_functions.c index acaf69df7..1b5c9fb19 100644 --- a/src/sacctmgr/user_functions.c +++ b/src/sacctmgr/user_functions.c @@ -61,6 +61,12 @@ static int _set_cond(int *start, int argc, char *argv[], user_cond->assoc_cond->max_jobs = NO_VAL; user_cond->assoc_cond->max_nodes_per_job = NO_VAL; user_cond->assoc_cond->max_wall_duration_per_job = NO_VAL; + /* we need this to make sure we only change users, not + * accounts if this list didn't exist it would change + * accounts. + */ + user_cond->assoc_cond->user_list = + list_create(slurm_destroy_char); } for (i=(*start); i<argc; i++) { @@ -77,13 +83,9 @@ static int _set_cond(int *start, int argc, char *argv[], } else if(!end || !strncasecmp (argv[i], "Names", 1) || !strncasecmp (argv[i], "Users", 1)) { - if(!user_cond->assoc_cond->user_list) { - user_cond->assoc_cond->user_list = - list_create(slurm_destroy_char); - } if(slurm_addto_char_list( user_cond->assoc_cond->user_list, - argv[i]+end)) + argv[i]+end)) u_set = 1; } else if (!strncasecmp (argv[i], "Account", 2)) { if(!user_cond->assoc_cond->acct_list) { @@ -155,7 +157,9 @@ static int _set_cond(int *start, int argc, char *argv[], (*start) = i; - if(a_set) { + if(u_set && a_set) + return 3; + else if(a_set) { return 2; } else if(u_set) return 1; @@ -619,9 +623,10 @@ extern int sacctmgr_add_user(int argc, char *argv[]) if(sacctmgr_find_association_from_list( local_assoc_list, - name, account, cluster, NULL)) - continue; - + name, account, cluster, NULL)) { + continue; + } + assoc = xmalloc(sizeof(acct_association_rec_t)); assoc->user = xstrdup(name); assoc->acct = xstrdup(account); @@ -846,7 +851,7 @@ extern int sacctmgr_list_user(int argc, char *argv[]) int rc = SLURM_SUCCESS; acct_user_cond_t *user_cond = xmalloc(sizeof(acct_user_cond_t)); List user_list; - int i=0; + int i=0, set=0; ListIterator itr = NULL; ListIterator itr2 = NULL; acct_user_rec_t *user = NULL; @@ -855,6 +860,7 @@ extern int sacctmgr_list_user(int argc, char *argv[]) List qos_list = NULL; print_field_t *field = NULL; + int field_count = 0; List format_list = list_create(slurm_destroy_char); List print_fields_list; /* types are of print_field_t */ @@ -881,7 +887,7 @@ extern int sacctmgr_list_user(int argc, char *argv[]) user_cond->with_assocs = with_assoc_flag; - _set_cond(&i, argc, argv, user_cond, format_list); + set = _set_cond(&i, argc, argv, user_cond, format_list); if(exit_code) { destroy_acct_user_cond(user_cond); @@ -898,6 +904,17 @@ extern int sacctmgr_list_user(int argc, char *argv[]) slurm_addto_char_list(format_list, "Coord"); } + if(!user_cond->with_assocs && set > 1) { + if(!commit_check("You requested options that are only vaild " + "when querying with the withassoc option.\n" + "Are you sure you want to continue?")) { + printf("Aborted\n"); + list_destroy(format_list); + destroy_acct_user_cond(user_cond); + return SLURM_SUCCESS; + } + } + print_fields_list = list_create(destroy_print_field); itr = list_iterator_create(format_list); @@ -978,7 +995,8 @@ extern int sacctmgr_list_user(int argc, char *argv[]) field->name = xstrdup("Partition"); field->len = 10; field->print_routine = print_fields_str; - } else if(!strncasecmp("User", object, 1)) { + } else if(!strncasecmp("User", object, 1) + || !strncasecmp("Name", object, 2)) { field->type = PRINT_USER; field->name = xstrdup("User"); field->len = 10; @@ -1014,73 +1032,100 @@ extern int sacctmgr_list_user(int argc, char *argv[]) itr2 = list_iterator_create(print_fields_list); print_fields_header(print_fields_list); + field_count = list_count(print_fields_list); + while((user = list_next(itr))) { - if(user->assoc_list && list_count(user->assoc_list)) { + if(user->assoc_list) { ListIterator itr3 = list_iterator_create(user->assoc_list); while((assoc = list_next(itr3))) { + int curr_inx = 1; while((field = list_next(itr2))) { switch(field->type) { case PRINT_ACCOUNT: field->print_routine( field, - assoc->acct); + assoc->acct, + (curr_inx == + field_count)); break; case PRINT_ADMIN: field->print_routine( field, acct_admin_level_str( user-> - admin_level)); + admin_level), + (curr_inx == + field_count)); break; case PRINT_CLUSTER: field->print_routine( field, - assoc->cluster); + assoc->cluster, + (curr_inx == + field_count)); break; case PRINT_COORDS: field->print_routine( field, - user->coord_accts); + user->coord_accts, + (curr_inx == + field_count)); break; case PRINT_DACCT: field->print_routine( field, - user->default_acct); + user->default_acct, + (curr_inx == + field_count), + (curr_inx == + field_count)); break; case PRINT_FAIRSHARE: field->print_routine( field, - assoc->fairshare); + assoc->fairshare, + (curr_inx == + field_count)); break; case PRINT_ID: field->print_routine( field, - assoc->id); + assoc->id, + (curr_inx == + field_count)); break; case PRINT_MAXC: field->print_routine( field, assoc-> - max_cpu_secs_per_job); + max_cpu_secs_per_job, + (curr_inx == + field_count)); break; case PRINT_MAXJ: field->print_routine( field, - assoc->max_jobs); + assoc->max_jobs, + (curr_inx == + field_count)); break; case PRINT_MAXN: field->print_routine( field, assoc-> - max_nodes_per_job); + max_nodes_per_job, + (curr_inx == + field_count)); break; case PRINT_MAXW: field->print_routine( field, assoc-> - max_wall_duration_per_job); + max_wall_duration_per_job, + (curr_inx == + field_count)); break; case PRINT_QOS: if(!qos_list) { @@ -1092,7 +1137,9 @@ extern int sacctmgr_list_user(int argc, char *argv[]) field->print_routine( field, qos_list, - user->qos_list); + user->qos_list, + (curr_inx == + field_count)); break; case PRINT_QOS_RAW: if(!qos_list) { @@ -1104,94 +1151,117 @@ extern int sacctmgr_list_user(int argc, char *argv[]) field->print_routine( field, qos_list, - user->qos_list); + user->qos_list, + (curr_inx == + field_count)); break; case PRINT_PID: field->print_routine( field, - assoc->parent_id); + assoc->parent_id, + (curr_inx == + field_count)); break; case PRINT_PNAME: field->print_routine( field, - assoc->parent_acct); + assoc->parent_acct, + (curr_inx == + field_count)); break; case PRINT_PART: field->print_routine( field, - assoc->partition); + assoc->partition, + (curr_inx == + field_count)); break; case PRINT_USER: field->print_routine( field, - user->name); + user->name, + (curr_inx == + field_count)); break; default: break; } + curr_inx++; } list_iterator_reset(itr2); printf("\n"); } list_iterator_destroy(itr3); } else { + int curr_inx = 1; while((field = list_next(itr2))) { switch(field->type) { case PRINT_ACCOUNT: field->print_routine( field, - NULL); + NULL, + (curr_inx == field_count)); break; case PRINT_ADMIN: field->print_routine( field, acct_admin_level_str( - user->admin_level)); + user->admin_level), + (curr_inx == field_count)); break; case PRINT_CLUSTER: field->print_routine( field, - NULL); + NULL, + (curr_inx == field_count)); break; case PRINT_COORDS: field->print_routine( field, - user->coord_accts); + user->coord_accts, + (curr_inx == field_count)); break; case PRINT_DACCT: field->print_routine( field, - user->default_acct); + user->default_acct, + (curr_inx == field_count)); break; case PRINT_FAIRSHARE: field->print_routine( field, - NULL); + NULL, + (curr_inx == field_count)); break; case PRINT_ID: field->print_routine( field, - NULL); + NULL, + (curr_inx == field_count)); break; case PRINT_MAXC: field->print_routine( field, - NULL); + NULL, + (curr_inx == field_count)); break; case PRINT_MAXJ: field->print_routine( field, - NULL); + NULL, + (curr_inx == field_count)); break; case PRINT_MAXN: field->print_routine( field, - NULL); + NULL, + (curr_inx == field_count)); break; case PRINT_MAXW: field->print_routine( field, - NULL); + NULL, + (curr_inx == field_count)); break; case PRINT_QOS: if(!qos_list) { @@ -1202,7 +1272,8 @@ extern int sacctmgr_list_user(int argc, char *argv[]) } field->print_routine( field, qos_list, - user->qos_list); + user->qos_list, + (curr_inx == field_count)); break; case PRINT_QOS_RAW: if(!qos_list) { @@ -1213,26 +1284,31 @@ extern int sacctmgr_list_user(int argc, char *argv[]) } field->print_routine( field, qos_list, - user->qos_list); + user->qos_list, + (curr_inx == field_count)); break; case PRINT_PID: field->print_routine( field, - NULL); + NULL, + (curr_inx == field_count)); break; case PRINT_PART: field->print_routine( field, - NULL); + NULL, + (curr_inx == field_count)); break; case PRINT_USER: field->print_routine( field, - user->name); + user->name, + (curr_inx == field_count)); break; default: break; } + curr_inx++; } list_iterator_reset(itr2); printf("\n"); @@ -1298,30 +1374,32 @@ extern int sacctmgr_modify_user(int argc, char *argv[]) return SLURM_SUCCESS; } } - + notice_thread_init(); if(rec_set == 3 || rec_set == 1) { // process the account changes if(cond_set == 2) { rc = SLURM_ERROR; - if(list_count(user_cond->assoc_cond->acct_list)) { - notice_thread_fini(); - if(!commit_check( - " You specified Accounts if your " - "request. Did you mean " - "DefaultAccounts?\n")) { - goto assoc_start; - } - notice_thread_init(); + exit_code=1; + fprintf(stderr, + " There was a problem with your " + "'where' options.\n"); + goto assoc_start; + } + + if(user_cond->assoc_cond + && user_cond->assoc_cond->acct_list + && list_count(user_cond->assoc_cond->acct_list)) { + notice_thread_fini(); + if(commit_check( + " You specified Accounts in your " + "request. Did you mean " + "DefaultAccounts?\n")) { list_transfer(user_cond->def_acct_list, user_cond->assoc_cond->acct_list); - } else { - exit_code=1; - fprintf(stderr, - " There was a problem with your " - "'where' options.\n"); - goto assoc_start; } + notice_thread_init(); } + ret_list = acct_storage_g_modify_users( db_conn, my_uid, user_cond, user); if(ret_list && list_count(ret_list)) { @@ -1347,6 +1425,16 @@ extern int sacctmgr_modify_user(int argc, char *argv[]) assoc_start: if(rec_set == 3 || rec_set == 2) { // process the association changes + if(cond_set == 1 + && !list_count(user_cond->assoc_cond->user_list)) { + rc = SLURM_ERROR; + exit_code=1; + fprintf(stderr, + " There was a problem with your " + "'where' options.\n"); + goto assoc_end; + } + ret_list = acct_storage_g_modify_associations( db_conn, my_uid, user_cond->assoc_cond, assoc); @@ -1370,6 +1458,7 @@ assoc_start: if(ret_list) list_destroy(ret_list); } +assoc_end: notice_thread_fini(); if(set) { @@ -1413,7 +1502,7 @@ extern int sacctmgr_delete_user(int argc, char *argv[]) if(set == 1) { ret_list = acct_storage_g_remove_users( db_conn, my_uid, user_cond); - } else if(set == 2) { + } else if(set == 2 || set == 3) { ret_list = acct_storage_g_remove_associations( db_conn, my_uid, user_cond->assoc_cond); } @@ -1426,7 +1515,7 @@ extern int sacctmgr_delete_user(int argc, char *argv[]) ListIterator itr = list_iterator_create(ret_list); if(set == 1) { printf(" Deleting users...\n"); - } else if(set == 2) { + } else if(set == 2 || set == 3) { printf(" Deleting user associations...\n"); } while((object = list_next(itr))) { @@ -1481,20 +1570,24 @@ extern int sacctmgr_delete_coord(int argc, char *argv[]) destroy_acct_user_cond(user_cond); return SLURM_ERROR; } - - itr = list_iterator_create(user_cond->assoc_cond->user_list); - while((name = list_next(itr))) { - xstrfmtcat(user_str, " %s\n", name); - + if(user_cond->assoc_cond->user_list) { + itr = list_iterator_create(user_cond->assoc_cond->user_list); + while((name = list_next(itr))) { + xstrfmtcat(user_str, " %s\n", name); + + } + list_iterator_destroy(itr); } - list_iterator_destroy(itr); - - itr = list_iterator_create(user_cond->assoc_cond->acct_list); - while((name = list_next(itr))) { - xstrfmtcat(acct_str, " %s\n", name); + if(user_cond->assoc_cond->acct_list) { + itr = list_iterator_create(user_cond->assoc_cond->acct_list); + while((name = list_next(itr))) { + xstrfmtcat(acct_str, " %s\n", name); + + } + list_iterator_destroy(itr); } - list_iterator_destroy(itr); + if(!user_str && !acct_str) { exit_code=1; fprintf(stderr, " You need to specify a user list " diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 79c5616b3..de913220f 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -61,6 +61,7 @@ pthread_mutex_t allocation_state_lock = PTHREAD_MUTEX_INITIALIZER; static bool exit_flag = false; static bool allocation_interrupted = false; static uint32_t pending_job_id = 0; +static time_t last_timeout = 0; static int fill_job_desc_from_opts(job_desc_msg_t *desc); static void ring_terminal_bell(void); @@ -462,8 +463,14 @@ static void _job_complete_handler(srun_job_complete_msg_t *comp) * no need to print this message. We probably * relinquished the allocation ourself. */ - info("Job allocation %u has been revoked.", - comp->job_id); + if (last_timeout && (last_timeout < time(NULL))) { + info("Job %u has exceeded its time limit and " + "its allocation has been revoked.", + comp->job_id); + } else { + info("Job allocation %u has been revoked.", + comp->job_id); + } } if (allocation_state == GRANTED && command_pid > -1 @@ -491,8 +498,6 @@ static void _job_complete_handler(srun_job_complete_msg_t *comp) */ static void _timeout_handler(srun_timeout_msg_t *msg) { - static time_t last_timeout = 0; - if (msg->timeout != last_timeout) { last_timeout = msg->timeout; verbose("Job allocation time limit to be reached at %s", diff --git a/src/sattach/sattach.c b/src/sattach/sattach.c index 988d59704..ddc5e4452 100644 --- a/src/sattach/sattach.c +++ b/src/sattach/sattach.c @@ -326,7 +326,8 @@ static int _attach_to_tasks(uint32_t jobid, msg.msg_type = REQUEST_REATTACH_TASKS; msg.data = &reattach_msg; - nodes_resp = slurm_send_recv_msgs(layout->node_list, &msg, timeout); + nodes_resp = slurm_send_recv_msgs(layout->node_list, &msg, + timeout, false); if (nodes_resp == NULL) { error("slurm_send_recv_msgs failed: %m"); return SLURM_ERROR; @@ -529,6 +530,12 @@ _exit_handler(message_thread_state_t *mts, slurm_msg_t *exit_msg) int i; int rc; + if ((msg->job_id != opt.jobid) || (msg->step_id != opt.stepid)) { + debug("Received MESSAGE_TASK_EXIT from wrong job: %u.%u", + msg->job_id, msg->step_id); + return; + } + pthread_mutex_lock(&mts->lock); for (i = 0; i < msg->num_tasks; i++) { diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index 263b70c20..946c75802 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -61,7 +61,6 @@ #include "src/common/list.h" #include "src/common/log.h" #include "src/common/parse_time.h" -#include "src/common/plugstack.h" #include "src/common/proc_args.h" #include "src/common/slurm_protocol_api.h" #include "src/common/uid.h" @@ -583,12 +582,6 @@ char *process_options_first_pass(int argc, char **argv) { int opt_char, option_index = 0; char *str = NULL; - struct option *optz = spank_option_table_create (long_options); - - if (!optz) { - error ("Unable to create option table"); - exit (1); - } /* initialize option defaults */ _opt_default(); @@ -597,7 +590,7 @@ char *process_options_first_pass(int argc, char **argv) optind = 0; while((opt_char = getopt_long(argc, argv, opt_string, - optz, &option_index)) != -1) { + long_options, &option_index)) != -1) { switch (opt_char) { case '?': fprintf(stderr, "Try \"sbatch --help\" for more " @@ -916,16 +909,10 @@ static void _set_options(int argc, char **argv) { int opt_char, option_index = 0; char *tmp; - struct option *optz = spank_option_table_create (long_options); - - if (!optz) { - error ("Unable to create option table"); - exit (1); - } optind = 0; while((opt_char = getopt_long(argc, argv, opt_string, - optz, &option_index)) != -1) { + long_options, &option_index)) != -1) { switch (opt_char) { case '?': fatal("Try \"sbatch --help\" for more information"); @@ -1335,15 +1322,14 @@ static void _set_options(int argc, char **argv) setenv("SLURM_NETWORK", opt.network, 1); break; default: - if (spank_process_option (opt_char, optarg) < 0) - exit (1); + fatal("Unrecognized command line parameter %c", + opt_char); } } if (optind < argc) { fatal("Invalid argument: %s", argv[optind]); } - spank_option_table_destroy (optz); } static void _proc_get_user_env(char *optarg) @@ -2237,9 +2223,6 @@ static void _help(void) } slurm_conf_unlock(); - printf("\n"); - spank_print_options (stdout, 6, 30); - printf("\n" #ifdef HAVE_AIX /* AIX/Federation specific options */ "AIX related options:\n" diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c index 5cda2761f..ceaf73c7f 100644 --- a/src/sbatch/sbatch.c +++ b/src/sbatch/sbatch.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * sbatch.c - Submit a SLURM batch script. * - * $Id: sbatch.c 14469 2008-07-09 18:15:23Z jette $ + * $Id: sbatch.c 14812 2008-08-19 00:10:09Z jette $ ***************************************************************************** * Copyright (C) 2006-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -42,7 +42,6 @@ #include <slurm/slurm.h> #include "src/common/env.h" -#include "src/common/plugstack.h" #include "src/common/read_config.h" #include "src/common/slurm_rlimits_info.h" #include "src/common/xstring.h" @@ -52,8 +51,6 @@ #define MAX_RETRIES 3 -static void _call_spank_local_user(job_desc_msg_t desc, - submit_response_msg_t *resp); static int fill_job_desc_from_opts(job_desc_msg_t *desc); static void *get_script_buffer(const char *filename, int *size); static void set_prio_process_env(void); @@ -72,9 +69,6 @@ int main(int argc, char *argv[]) int retries = 0; log_init(xbasename(argv[0]), logopt, 0, NULL); - if (spank_init(NULL) < 0) - fatal("Plug-in initialization failed"); - script_name = process_options_first_pass(argc, argv); /* reinit log with new verbosity (if changed by command line) */ if (opt.verbose || opt.quiet) { @@ -122,31 +116,12 @@ int main(int argc, char *argv[]) error(msg); sleep (++retries); } - _call_spank_local_user(desc, resp); info("Submitted batch job %d", resp->job_id); xfree(desc.script); slurm_free_submit_response_response_msg(resp); - spank_fini(NULL); return 0; } -static void _call_spank_local_user(job_desc_msg_t desc, - submit_response_msg_t *resp) -{ - struct spank_launcher_job_info info[1]; - - info->uid = desc.user_id; - info->gid = desc.group_id; - info->jobid = resp->job_id; - info->stepid = SLURM_BATCH_SCRIPT; - info->step_layout = NULL; - info->argc = desc.argc; - info->argv = desc.argv; - - if (spank_local_user(info) < 0) - error("spank_local_user: %m"); -} - /* Returns 0 on success, -1 on failure */ static int fill_job_desc_from_opts(job_desc_msg_t *desc) { @@ -421,9 +396,6 @@ static void *get_script_buffer(const char *filename, int *size) if (script_size == 0) { error("Batch script is empty!"); goto fail; - } else if (script_size >= 0xffff) { - error("Job script exceeds size supported by slurm"); - goto fail; } else if (xstring_is_whitespace(buf)) { error("Batch script contains only whitespace!"); goto fail; diff --git a/src/sbcast/agent.c b/src/sbcast/agent.c index 7b69d71ef..1610132ed 100644 --- a/src/sbcast/agent.c +++ b/src/sbcast/agent.c @@ -86,7 +86,7 @@ static void *_agent_thread(void *args) ret_list = slurm_send_recv_msgs(thread_ptr->nodelist, &thread_ptr->msg, - params.timeout); + params.timeout, false); if (ret_list == NULL) { error("slurm_send_recv_msgs: %m"); exit(1); diff --git a/src/scancel/scancel.c b/src/scancel/scancel.c index 10fb82484..688730bc3 100644 --- a/src/scancel/scancel.c +++ b/src/scancel/scancel.c @@ -262,7 +262,7 @@ _cancel_job_id (uint32_t job_id, uint16_t sig) } if (error_code) { error_code = slurm_get_errno(); - if ((opt.verbose >= 0) || + if ((opt.verbose > 0) || ((error_code != ESLURM_ALREADY_DONE) && (error_code != ESLURM_INVALID_JOB_ID))) error("Kill job error on job id %u: %s", @@ -302,7 +302,7 @@ _cancel_step_id (uint32_t job_id, uint32_t step_id, uint16_t sig) } if (error_code) { error_code = slurm_get_errno(); - if ((opt.verbose >= 0) || (error_code != ESLURM_ALREADY_DONE )) + if ((opt.verbose > 0) || (error_code != ESLURM_ALREADY_DONE )) error("Kill job error on job step id %u.%u: %s", job_id, step_id, slurm_strerror(slurm_get_errno())); diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c index 9106ba516..a91878257 100644 --- a/src/scontrol/scontrol.c +++ b/src/scontrol/scontrol.c @@ -446,7 +446,7 @@ _print_daemons (void) static int _process_command (int argc, char *argv[]) { - int error_code; + int error_code = 0; if (argc < 1) { exit_code = 1; @@ -764,17 +764,31 @@ _process_command (int argc, char *argv[]) } else if (strncasecmp (argv[0], "shutdown", 8) == 0) { /* require full command name */ - if (argc > 2) { + uint16_t options = 0; + if (argc == 2) { + if (strcmp(argv[1], "slurmctld") && + strcmp(argv[1], "controller")) { + error_code = 1; + exit_code = 1; + fprintf (stderr, + "invalid shutdown argument:%s\n", + argv[1]); + } else + options= 2; + } else if (argc > 2) { + error_code = 1; exit_code = 1; fprintf (stderr, "too many arguments for keyword:%s\n", argv[0]); } - error_code = slurm_shutdown (0); - if (error_code) { - exit_code = 1; - if (quiet_flag != 1) - slurm_perror ("slurm_shutdown error"); + if (error_code == 0) { + error_code = slurm_shutdown(options); + if (error_code) { + exit_code = 1; + if (quiet_flag != 1) + slurm_perror ("slurm_shutdown error"); + } } } else if (strncasecmp (argv[0], "update", 1) == 0) { @@ -1069,7 +1083,7 @@ scontrol [<OPTION>] [<COMMAND>] \n\ setdebug <LEVEL> reset slurmctld debug level \n\ show <ENTITY> [<ID>] display state of identified entity, default \n\ is all records. \n\ - shutdown shutdown slurm controller. \n\ + shutdown <OPTS> shutdown slurm daemons \n\ suspend <job_id> susend specified job \n\ resume <job_id> resume previously suspended job \n\ setdebug <level> set slurmctld debug level \n\ @@ -1094,6 +1108,9 @@ scontrol [<OPTION>] [<COMMAND>] \n\ file or the name of the most detailed errors to report (e.g. \"info\",\n\ \"verbose\", \"debug\", \"debug2\", etc.). \n\ \n\ + <OPTS> may be \"slurmctld\" to shutdown just the slurmctld daemon, \n\ + otherwise all slurm daemons are shutdown \n\ + \n\ Node names may be specified using simple range expressions, \n\ (e.g. \"lx[10-20]\" corresponsds to lx10, lx11, lx12, ...) \n\ The job step id is the job id followed by a period and the step id. \n\ diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c index 89f073d9b..64d20e36e 100644 --- a/src/slurmctld/agent.c +++ b/src/slurmctld/agent.c @@ -229,7 +229,7 @@ void *agent(void *args) } slurm_mutex_unlock(&agent_cnt_mutex); if (slurmctld_config.shutdown_time) - return NULL; + goto cleanup; /* basic argument value tests */ begin_time = time(NULL); @@ -734,14 +734,15 @@ finished: ; #endif } -/* Report a communications error for specified node */ +/* Report a communications error for specified node + * This also gets logged as a non-responsive node */ static inline int _comm_err(char *node_name) { int rc = 1; #if AGENT_IS_THREAD if ((rc = is_node_resp (node_name))) #endif - error("agent/send_recv_msg: %s: %m", node_name); + verbose("agent/send_recv_msg: %s: %m", node_name); return rc; } @@ -841,8 +842,7 @@ static void *_thread_per_group_rpc(void *args) } else { if(!(ret_list = slurm_send_recv_msgs( thread_ptr->nodelist, - &msg, - 0))) { + &msg, 0, true))) { error("_thread_per_group_rpc: " "no ret_list given"); goto cleanup; @@ -1335,13 +1335,16 @@ static void _purge_agent_args(agent_arg_t *agent_arg_ptr) RESPONSE_RESOURCE_ALLOCATION) slurm_free_resource_allocation_response_msg( agent_arg_ptr->msg_args); - else if ((agent_arg_ptr->msg_type == REQUEST_TERMINATE_JOB) - || (agent_arg_ptr->msg_type == REQUEST_KILL_TIMELIMIT)) + else if ((agent_arg_ptr->msg_type == REQUEST_ABORT_JOB) || + (agent_arg_ptr->msg_type == REQUEST_TERMINATE_JOB) || + (agent_arg_ptr->msg_type == REQUEST_KILL_TIMELIMIT)) slurm_free_kill_job_msg(agent_arg_ptr->msg_args); else if (agent_arg_ptr->msg_type == SRUN_USER_MSG) slurm_free_srun_user_msg(agent_arg_ptr->msg_args); else if (agent_arg_ptr->msg_type == SRUN_EXEC) slurm_free_srun_exec_msg(agent_arg_ptr->msg_args); + else if (agent_arg_ptr->msg_type == SRUN_NODE_FAIL) + slurm_free_srun_node_fail_msg(agent_arg_ptr->msg_args); else xfree(agent_arg_ptr->msg_args); } @@ -1414,7 +1417,7 @@ extern void mail_job_info (struct job_record *job_ptr, uint16_t mail_type) mail_info_t *mi = _mail_alloc(); if (!job_ptr->mail_user) - mi->user_name = xstrdup(uid_to_string((uid_t)job_ptr->user_id)); + mi->user_name = uid_to_string((uid_t)job_ptr->user_id); else mi->user_name = xstrdup(job_ptr->mail_user); diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index b6903a886..3c1959ae9 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -143,6 +143,7 @@ int bg_recover = DEFAULT_RECOVER; char *slurmctld_cluster_name = NULL; /* name of cluster */ void *acct_db_conn = NULL; int accounting_enforce = 0; +bool ping_nodes_now = false; /* Local variables */ static int daemonize = DEFAULT_DAEMONIZE; @@ -262,8 +263,7 @@ int main(int argc, char *argv[]) /* * Create StateSaveLocation directory if necessary. */ - if (set_slurmctld_state_loc() < 0) - fatal("Unable to initialize StateSaveLocation"); + set_slurmctld_state_loc(); if (daemonize) { slurmctld_config.daemonize = 1; @@ -293,6 +293,11 @@ int main(int argc, char *argv[]) slurmctld_config.daemonize = 0; } + /* This must happen before we spawn any threads + * which are not designed to handle them */ + if (xsignal_block(controller_sigarray) < 0) + error("Unable to block signals"); + /* This needs to be copied for other modules to access the * memory, it will report 'HashBase' if it is not duped */ @@ -329,9 +334,6 @@ int main(int argc, char *argv[]) * SLURM_CRED_OPT_EXPIRY_WINDOW, CRED_LIFE); */ - if (xsignal_block(controller_sigarray) < 0) - error("Unable to block signals"); - /* * Initialize plugins. */ @@ -419,10 +421,11 @@ int main(int argc, char *argv[]) &thread_attr, _slurmctld_rpc_mgr, NULL)) fatal("pthread_create error %m"); slurm_attr_destroy(&thread_attr); - clusteracct_storage_g_register_ctld( - slurmctld_conf.cluster_name, - slurmctld_conf.slurmctld_port); + clusteracct_storage_g_register_ctld( + slurmctld_conf.cluster_name, + slurmctld_conf.slurmctld_port); + /* * create attached thread for signal handling */ @@ -468,13 +471,13 @@ int main(int argc, char *argv[]) != SLURM_SUCCESS ) error("failed to save node selection state"); switch_save(slurmctld_conf.state_save_location); - if (slurmctld_config.resume_backup == false) - break; - recover = 2; /* Save any pending state save RPCs */ acct_storage_g_close_connection(&acct_db_conn); - assoc_mgr_fini(); + + if (slurmctld_config.resume_backup == false) + break; + recover = 2; } /* Since pidfile is created as user root (its owner is @@ -500,6 +503,12 @@ int main(int argc, char *argv[]) if (i >= 10) error("Left %d agent threads active", cnt); + /* Purge our local data structures */ + job_fini(); + part_fini(); /* part_fini() must preceed node_fini() */ + node_fini(); + trigger_fini(); + /* Plugins are needed to purge job/node data structures, * unplug after other data structures are purged */ g_slurm_jobcomp_fini(); @@ -512,12 +521,6 @@ int main(int argc, char *argv[]) switch_fini(); assoc_mgr_fini(); - /* Purge our local data structures */ - job_fini(); - part_fini(); /* part_fini() must preceed node_fini() */ - node_fini(); - trigger_fini(); - /* purge remaining data structures */ slurm_cred_ctx_destroy(slurmctld_config.cred_ctx); slurm_crypto_fini(); /* must be after ctx_destroy */ @@ -977,6 +980,8 @@ static void *_slurmctld_background(void *no_data) static time_t last_sched_time; static time_t last_checkpoint_time; static time_t last_group_time; + static time_t last_health_check_time; + static time_t last_no_resp_msg_time; static time_t last_ping_node_time; static time_t last_ping_srun_time; static time_t last_purge_job_time; @@ -985,7 +990,7 @@ static void *_slurmctld_background(void *no_data) static time_t last_trigger; static time_t last_node_acct; time_t now; - int ping_interval; + int no_resp_msg_interval, ping_interval; DEF_TIMERS; /* Locks: Read config */ @@ -1004,6 +1009,9 @@ static void *_slurmctld_background(void *no_data) /* Locks: Read node */ slurmctld_lock_t node_read_lock = { NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; + /* Locks: Write node */ + slurmctld_lock_t node_write_lock2 = { + NO_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK }; /* Locks: Write partition */ slurmctld_lock_t part_write_lock = { NO_LOCK, NO_LOCK, NO_LOCK, WRITE_LOCK }; @@ -1011,15 +1019,19 @@ static void *_slurmctld_background(void *no_data) /* Let the dust settle before doing work */ now = time(NULL); last_sched_time = last_checkpoint_time = last_group_time = now; - last_purge_job_time = last_trigger = now; + last_purge_job_time = last_trigger = last_health_check_time = now; last_timelimit_time = last_assert_primary_time = now; + last_no_resp_msg_time = now; if (slurmctld_conf.slurmd_timeout) { - /* We ping nodes that haven't responded in SlurmdTimeout/2, + /* We ping nodes that haven't responded in SlurmdTimeout/3, * but need to do the test at a higher frequency or we might * DOWN nodes with times that fall in the gap. */ ping_interval = slurmctld_conf.slurmd_timeout / 3; - } else - ping_interval = 60 * 60 * 24 * 356; /* one year */ + } else { + /* This will just ping non-responding nodes + * and restore them to service */ + ping_interval = 100; /* 100 seconds */ + } last_ping_node_time = now + (time_t)MIN_CHECKIN_TIME - ping_interval; last_ping_srun_time = now; last_node_acct = now; @@ -1034,6 +1046,13 @@ static void *_slurmctld_background(void *no_data) now = time(NULL); START_TIMER; + if (slurmctld_conf.slurmctld_debug <= 3) + no_resp_msg_interval = 300; + else if (slurmctld_conf.slurmctld_debug == 4) + no_resp_msg_interval = 60; + else + no_resp_msg_interval = 1; + if (slurmctld_config.shutdown_time) { int i; /* wait for RPC's to complete */ @@ -1053,6 +1072,14 @@ static void *_slurmctld_background(void *no_data) break; } + if (difftime(now, last_no_resp_msg_time) >= + no_resp_msg_interval) { + last_no_resp_msg_time = now; + lock_slurmctld(node_write_lock2); + node_no_resp_msg(); + unlock_slurmctld(node_write_lock2); + } + if (difftime(now, last_timelimit_time) >= PERIODIC_TIMEOUT) { last_timelimit_time = now; debug2("Performing job time limit and checkpoint test"); @@ -1062,11 +1089,23 @@ static void *_slurmctld_background(void *no_data) unlock_slurmctld(job_write_lock); } - if (difftime(now, last_ping_node_time) >= ping_interval) { + if (slurmctld_conf.health_check_interval && + (difftime(now, last_health_check_time) >= + slurmctld_conf.health_check_interval)) { + if (is_ping_done()) { + last_health_check_time = now; + lock_slurmctld(node_write_lock); + run_health_check(); + unlock_slurmctld(node_write_lock); + } + } + if ((difftime(now, last_ping_node_time) >= ping_interval) || + ping_nodes_now) { static bool msg_sent = false; if (is_ping_done()) { msg_sent = false; last_ping_node_time = now; + ping_nodes_now = false; lock_slurmctld(node_write_lock); ping_nodes(); unlock_slurmctld(node_write_lock); @@ -1458,25 +1497,26 @@ _init_pidfile(void) /* * set_slurmctld_state_loc - create state directory as needed and "cd" to it */ -extern int +extern void set_slurmctld_state_loc(void) { - char *tmp; - - if ((mkdir(slurmctld_conf.state_save_location, 0755) < 0) && - (errno != EEXIST)) { - fatal("mkdir(%s): %m", slurmctld_conf.state_save_location); - return SLURM_ERROR; - } + int rc; + struct stat st; + const char *path = slurmctld_conf.state_save_location; - tmp = xstrdup(slurmctld_conf.state_save_location); - xstrcat(tmp, "/slurm_mkdir_test"); - if ((mkdir(tmp, 0755) < 0) && (errno != EEXIST)) { - fatal("mkdir(%s): %m", tmp); - return SLURM_ERROR; + /* + * If state save location does not exist, try to create it. + * Otherwise, ensure path is a directory as expected, and that + * we have permission to write to it. + */ + if (((rc = stat(path, &st)) < 0) && (errno == ENOENT)) { + if (mkdir(path, 0755) < 0) + fatal("mkdir(%s): %m", path); } - (void) unlink(tmp); - xfree(tmp); - - return SLURM_SUCCESS; + else if (rc < 0) + fatal("Unable to stat state save loc: %s: %m", path); + else if (!S_ISDIR(st.st_mode)) + fatal("State save loc: %s: Not a directory!", path); + else if (access(path, R_OK|W_OK|X_OK) < 0) + fatal("Incorrect permissions on state save loc: %s", path); } diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index b9ec0a969..f8c3c7d30 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -1677,7 +1677,13 @@ extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t batch_flag, /* save user ID of the one who requested the job be cancelled */ if(signal == SIGKILL) job_ptr->requid = uid; - + if ((job_ptr->job_state == (JOB_PENDING | JOB_COMPLETING)) && + (signal == SIGKILL)) { + job_ptr->job_state = JOB_CANCELLED | JOB_COMPLETING; + verbose("job_signal of requeuing job %u successful", job_id); + return SLURM_SUCCESS; + } + if ((job_ptr->job_state == JOB_PENDING) && (signal == SIGKILL)) { last_job_update = now; @@ -2741,7 +2747,8 @@ _copy_job_desc_to_job_record(job_desc_msg_t * job_desc, job_ptr->comment = xstrdup(job_desc->comment); if (!wiki_sched_test) { char *sched_type = slurm_get_sched_type(); - if (strcmp(sched_type, "sched/wiki") == 0) + if ((strcmp(sched_type, "sched/wiki") == 0) || + (strcmp(sched_type, "sched/wiki2") == 0)) wiki_sched = true; xfree(sched_type); wiki_sched_test = true; @@ -2991,18 +2998,6 @@ static void _job_timed_out(struct job_record *job_ptr) static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate, uid_t submit_uid) { - /* Permit normal user to specify job id only for sched/wiki - * (Maui scheduler). This was also required with earlier - * versions of the Moab scheduler (wiki2), but was fixed - * in early 2007 to submit jobs as user root */ - if (!wiki_sched_test) { - char *sched_type = slurm_get_sched_type(); - if (strcmp(sched_type, "sched/wiki") == 0) - wiki_sched = true; - xfree(sched_type); - wiki_sched_test = true; - } - if ((job_desc_msg->num_procs == NO_VAL) && (job_desc_msg->min_nodes == NO_VAL) && (job_desc_msg->req_nodes == NULL)) { @@ -3037,7 +3032,7 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate, if (job_desc_msg->job_id != NO_VAL) { struct job_record *dup_job_ptr; - if ((submit_uid != 0) && (!wiki_sched) && + if ((submit_uid != 0) && (submit_uid != slurmctld_conf.slurm_user_id)) { info("attempt by uid %u to set job_id", submit_uid); return ESLURM_INVALID_JOB_ID; @@ -3243,7 +3238,7 @@ extern void pack_all_jobs(char **buffer_ptr, int *buffer_size, while ((job_ptr = (struct job_record *) list_next(job_iterator))) { xassert (job_ptr->magic == JOB_MAGIC); - if (((show_flags & SHOW_ALL) == 0) && + if (((show_flags & SHOW_ALL) == 0) && (uid != 0) && (job_ptr->part_ptr) && (job_ptr->part_ptr->hidden)) continue; @@ -3837,6 +3832,14 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) return ESLURM_USER_ID_MISSING; } + if (!wiki_sched_test) { + char *sched_type = slurm_get_sched_type(); + if ((strcmp(sched_type, "sched/wiki") == 0) || + (strcmp(sched_type, "sched/wiki2") == 0)) + wiki_sched = true; + xfree(sched_type); + wiki_sched_test = true; + } detail_ptr = job_ptr->details; if (detail_ptr) mc_ptr = detail_ptr->mc_ptr; @@ -4131,7 +4134,21 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) } } - if (job_specs->comment) { + if (job_specs->comment && wiki_sched && (!super_user)) { + /* User must use Moab command to change job comment */ + error("Attempt to change comment for job %u", + job_specs->job_id); + error_code = ESLURM_ACCESS_DENIED; +#if 0 + if (wiki_sched && strstr(job_ptr->comment, "QOS:")) { + if (strstr(job_ptr->comment, "FLAGS:PREEMPTOR")) + job_ptr->qos = QOS_EXPEDITE; + else if (strstr(job_ptr->comment, "FLAGS:PREEMPTEE")) + job_ptr->qos = QOS_STANDBY; + else + job_ptr->qos = QOS_NORMAL; +#endif + } else if (job_specs->comment) { xfree(job_ptr->comment); job_ptr->comment = job_specs->comment; job_specs->comment = NULL; /* Nothing left to free */ @@ -4546,7 +4563,7 @@ extern void validate_jobs_on_node(slurm_node_registration_status_msg_t *reg_msg) error("Orphan job %u.%u reported on node %s", reg_msg->job_id[i], reg_msg->step_id[i], reg_msg->node_name); - kill_job_on_node(reg_msg->job_id[i], job_ptr, node_ptr); + abort_job_on_node(reg_msg->job_id[i], job_ptr, node_ptr); } else if ((job_ptr->job_state == JOB_RUNNING) || @@ -4563,10 +4580,13 @@ extern void validate_jobs_on_node(slurm_node_registration_status_msg_t *reg_msg) job_ptr->time_last_active = now; } } else { + /* Typically indicates a job requeue and + * restart on another nodes. A node from the + * original allocation just responded here. */ error("Registered job %u.%u on wrong node %s ", reg_msg->job_id[i], reg_msg->step_id[i], reg_msg->node_name); - kill_job_on_node(reg_msg->job_id[i], job_ptr, + abort_job_on_node(reg_msg->job_id[i], job_ptr, node_ptr); } } @@ -4579,25 +4599,19 @@ extern void validate_jobs_on_node(slurm_node_registration_status_msg_t *reg_msg) else if (job_ptr->job_state == JOB_PENDING) { + /* Typically indicates a job requeue and the hung + * slurmd that went DOWN is now responding */ error("Registered PENDING job %u.%u on node %s ", reg_msg->job_id[i], reg_msg->step_id[i], reg_msg->node_name); - job_ptr->job_state = JOB_FAILED; - job_ptr->exit_code = 1; - job_ptr->state_reason = FAIL_SYSTEM; - last_job_update = now; - job_ptr->start_time = job_ptr->end_time = now; - kill_job_on_node(reg_msg->job_id[i], job_ptr, node_ptr); - job_completion_logger(job_ptr); - delete_job_details(job_ptr); + abort_job_on_node(reg_msg->job_id[i], job_ptr, node_ptr); } else { /* else job is supposed to be done */ - error - ("Registered job %u.%u in state %s on node %s ", - reg_msg->job_id[i], reg_msg->step_id[i], - job_state_string(job_ptr->job_state), - reg_msg->node_name); + error("Registered job %u.%u in state %s on node %s ", + reg_msg->job_id[i], reg_msg->step_id[i], + job_state_string(job_ptr->job_state), + reg_msg->node_name); kill_job_on_node(reg_msg->job_id[i], job_ptr, node_ptr); } } @@ -4644,7 +4658,7 @@ static void _purge_lost_batch_jobs(int node_inx, time_t now) } /* - * kill_job_on_node - Kill the specific job_id on a specific node, + * abort_job_on_node - Kill the specific job_id on a specific node, * the request is not processed immediately, but queued. * This is to prevent a flood of pthreads if slurmctld restarts * without saved state and slurmd daemons register with a @@ -4656,6 +4670,40 @@ static void _purge_lost_batch_jobs(int node_inx, time_t now) * IN node_ptr - pointer to the node on which the job resides */ extern void +abort_job_on_node(uint32_t job_id, struct job_record *job_ptr, + struct node_record *node_ptr) +{ + agent_arg_t *agent_info; + kill_job_msg_t *kill_req; + + debug("Aborting job %u on node %s", job_id, node_ptr->name); + + kill_req = xmalloc(sizeof(kill_job_msg_t)); + kill_req->job_id = job_id; + kill_req->time = time(NULL); + kill_req->nodes = xstrdup(node_ptr->name); + if (job_ptr) { /* NULL if unknown */ + kill_req->select_jobinfo = + select_g_copy_jobinfo(job_ptr->select_jobinfo); + } + + agent_info = xmalloc(sizeof(agent_arg_t)); + agent_info->node_count = 1; + agent_info->retry = 0; + agent_info->hostlist = hostlist_create(node_ptr->name); + agent_info->msg_type = REQUEST_ABORT_JOB; + agent_info->msg_args = kill_req; + + agent_queue_request(agent_info); +} + +/* + * kill_job_on_node - Kill the specific job_id on a specific node. + * IN job_id - id of the job to be killed + * IN job_ptr - pointer to terminating job (NULL if unknown, e.g. orphaned) + * IN node_ptr - pointer to the node on which the job resides + */ +extern void kill_job_on_node(uint32_t job_id, struct job_record *job_ptr, struct node_record *node_ptr) { diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 6f0936a9c..20863ba58 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -125,6 +125,9 @@ extern bool job_is_completing(void) struct job_record *job_ptr = NULL; time_t recent = time(NULL) - (slurmctld_conf.kill_wait + 2); + if (!job_list) + return completing; + job_iterator = list_iterator_create(job_list); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { if ((job_ptr->job_state & JOB_COMPLETING) && diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 1ed44a1cd..e8b51b15b 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -4,7 +4,7 @@ * hash table (node_hash_table), time stamp (last_node_update) and * configuration list (config_list) * - * $Id: node_mgr.c 14293 2008-06-19 19:27:39Z jette $ + * $Id: node_mgr.c 14872 2008-08-25 16:25:28Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -102,7 +102,6 @@ static void _make_node_down(struct node_record *node_ptr, time_t event_time); static void _node_did_resp(struct node_record *node_ptr); static bool _node_is_hidden(struct node_record *node_ptr); -static void _node_not_resp (struct node_record *node_ptr, time_t msg_time); static void _pack_node (struct node_record *dump_node_ptr, bool cr_flag, Buf buffer); static void _sync_bitmaps(struct node_record *node_ptr, int job_count); @@ -110,6 +109,9 @@ static void _update_config_ptr(bitstr_t *bitmap, struct config_record *config_ptr); static int _update_node_features(char *node_names, char *features); static bool _valid_node_state_change(uint16_t old, uint16_t new); +#ifndef HAVE_FRONT_END +static void _node_not_resp (struct node_record *node_ptr, time_t msg_time); +#endif #if _DEBUG static void _dump_hash (void); #endif @@ -842,8 +844,8 @@ extern void pack_all_node (char **buffer_ptr, int *buffer_size, xassert (node_ptr->config_ptr->magic == CONFIG_MAGIC); - if (((show_flags & SHOW_ALL) == 0) - && (_node_is_hidden(node_ptr))) + if (((show_flags & SHOW_ALL) == 0) && (uid != 0) && + (_node_is_hidden(node_ptr))) continue; if ((node_ptr->name == NULL) || (node_ptr->name[0] == '\0')) @@ -1067,9 +1069,13 @@ int update_node ( update_node_msg_t * update_node_msg ) node_ptr->node_state &= (~NODE_STATE_DRAIN); node_ptr->node_state &= (~NODE_STATE_FAIL); base_state &= NODE_STATE_BASE; - if (base_state == NODE_STATE_DOWN) + if (base_state == NODE_STATE_DOWN) { state_val = NODE_STATE_IDLE; - else + node_ptr->node_state |= + NODE_STATE_NO_RESPOND; + node_ptr->last_response = now; + ping_nodes_now = true; + } else state_val = base_state; } if (state_val == NODE_STATE_DOWN) { @@ -1668,7 +1674,7 @@ extern int validate_nodes_via_front_end( if (job_ptr == NULL) { error("Orphan job %u.%u reported", reg_msg->job_id[i], reg_msg->step_id[i]); - kill_job_on_node(reg_msg->job_id[i], job_ptr, node_ptr); + abort_job_on_node(reg_msg->job_id[i], job_ptr, node_ptr); } else if ((job_ptr->job_state == JOB_RUNNING) || @@ -1689,17 +1695,11 @@ extern int validate_nodes_via_front_end( else if (job_ptr->job_state == JOB_PENDING) { + /* Typically indicates a job requeue and the hung + * slurmd that went DOWN is now responding */ error("Registered PENDING job %u.%u", reg_msg->job_id[i], reg_msg->step_id[i]); - /* FIXME: Could possibly recover the job */ - job_ptr->job_state = JOB_FAILED; - job_ptr->exit_code = 1; - job_ptr->state_reason = FAIL_SYSTEM; - last_job_update = now; - job_ptr->start_time = job_ptr->end_time = now; - kill_job_on_node(reg_msg->job_id[i], job_ptr, node_ptr); - job_completion_logger(job_ptr); - delete_job_details(job_ptr); + abort_job_on_node(reg_msg->job_id[i], job_ptr, node_ptr); } else { /* else job is supposed to be done */ @@ -1992,30 +1992,55 @@ void node_not_resp (char *name, time_t msg_time) struct node_record *node_ptr; #ifdef HAVE_FRONT_END /* Fake all other nodes */ int i; - char host_str[64]; - hostlist_t no_resp_hostlist = hostlist_create(""); for (i=0; i<node_record_count; i++) { - node_ptr = &node_record_table_ptr[i]; - (void) hostlist_push_host(no_resp_hostlist, node_ptr->name); - _node_not_resp(node_ptr, msg_time); + node_ptr = node_record_table_ptr + i; + node_ptr->not_responding = true; } - hostlist_uniq(no_resp_hostlist); - hostlist_ranged_string(no_resp_hostlist, sizeof(host_str), host_str); - error("Nodes %s not responding", host_str); - hostlist_destroy(no_resp_hostlist); #else node_ptr = find_node_record (name); if (node_ptr == NULL) { error ("node_not_resp unable to find node %s", name); return; } - if ((node_ptr->node_state & NODE_STATE_BASE) != NODE_STATE_DOWN) - error("Node %s not responding", node_ptr->name); + if ((node_ptr->node_state & NODE_STATE_BASE) != NODE_STATE_DOWN) { + /* Logged by node_no_resp_msg() on periodic basis */ + node_ptr->not_responding = true; + } _node_not_resp(node_ptr, msg_time); #endif } +/* For every node with the "not_responding" flag set, clear the flag + * and log that the node is not responding using a hostlist expression */ +extern void node_no_resp_msg(void) +{ + int i; + struct node_record *node_ptr; + char host_str[1024]; + hostlist_t no_resp_hostlist = NULL; + + for (i=0; i<node_record_count; i++) { + node_ptr = &node_record_table_ptr[i]; + if (!node_ptr->not_responding) + continue; + if (no_resp_hostlist) { + (void) hostlist_push_host(no_resp_hostlist, + node_ptr->name); + } else + no_resp_hostlist = hostlist_create(node_ptr->name); + node_ptr->not_responding = false; + } + if (no_resp_hostlist) { + hostlist_uniq(no_resp_hostlist); + hostlist_ranged_string(no_resp_hostlist, + sizeof(host_str), host_str); + error("Nodes %s not responding", host_str); + hostlist_destroy(no_resp_hostlist); + } +} + +#ifndef HAVE_FRONT_END static void _node_not_resp (struct node_record *node_ptr, time_t msg_time) { int i; @@ -2034,6 +2059,7 @@ static void _node_not_resp (struct node_record *node_ptr, time_t msg_time) node_ptr->node_state |= NODE_STATE_NO_RESPOND; return; } +#endif /* * set_node_down - make the specified node's state DOWN and @@ -2173,7 +2199,7 @@ void msg_to_slurmd (slurm_msg_type_t msg_type) kill_agent_args->hostlist = hostlist_create(""); if (msg_type == REQUEST_SHUTDOWN) { shutdown_req = xmalloc(sizeof(shutdown_msg_t)); - shutdown_req->core = 0; + shutdown_req->options = 0; kill_agent_args->msg_args = shutdown_req; } diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index c88700050..e9120c332 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -210,6 +210,7 @@ extern void deallocate_nodes(struct job_record *job_ptr, bool timeout, xfree(kill_job->nodes); select_g_free_jobinfo(&kill_job->select_jobinfo); xfree(kill_job); + hostlist_destroy(agent_args->hostlist); xfree(agent_args); return; } diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c index a88b5998b..cd86e7d17 100644 --- a/src/slurmctld/partition_mgr.c +++ b/src/slurmctld/partition_mgr.c @@ -2,7 +2,7 @@ * partition_mgr.c - manage the partition information of slurm * Note: there is a global partition list (part_list) and * time stamp (last_part_update) - * $Id: partition_mgr.c 14068 2008-05-19 15:58:22Z jette $ + * $Id: partition_mgr.c 14795 2008-08-15 21:54:22Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -58,6 +58,7 @@ #include "src/common/list.h" #include "src/common/node_select.h" #include "src/common/pack.h" +#include "src/common/uid.h" #include "src/common/xstring.h" #include "src/slurmctld/locks.h" @@ -709,7 +710,7 @@ extern void pack_all_part(char **buffer_ptr, int *buffer_size, part_iterator = list_iterator_create(part_list); while ((part_ptr = (struct part_record *) list_next(part_iterator))) { xassert (part_ptr->magic == PART_MAGIC); - if (((show_flags & SHOW_ALL) == 0) && + if (((show_flags & SHOW_ALL) == 0) && (uid != 0) && ((part_ptr->hidden) || (validate_group (part_ptr, uid) == 0))) continue; pack_part(part_ptr, buffer); @@ -1018,6 +1019,7 @@ uid_t *_get_groups_members(char *group_names) if (group_names == NULL) return NULL; + tmp_names = xstrdup(group_names); one_group_name = strtok_r(tmp_names, ",", &name_ptr); while (one_group_name) { @@ -1051,50 +1053,61 @@ uid_t *_get_groups_members(char *group_names) */ uid_t *_get_group_members(char *group_name) { - struct group *group_struct_ptr; - struct passwd *user_pw_ptr; - int i, j; - uid_t *group_uids = NULL; - int uid_cnt = 0; + char grp_buffer[PW_BUF_SIZE]; + char pw_buffer[PW_BUF_SIZE]; + struct group grp, *grp_result = NULL; + struct passwd pw, *pwd_result = NULL; + uid_t *group_uids, my_uid; + gid_t my_gid; + int i, j, uid_cnt; +#ifdef HAVE_AIX + FILE *fp = NULL; +#endif - group_struct_ptr = getgrnam(group_name); /* Note: static memory, - * do not free */ - if (group_struct_ptr == NULL) { + /* We need to check for !grp_result, since it appears some + * versions of this function do not return an error on failure. + */ + if (getgrnam_r(group_name, &grp, grp_buffer, PW_BUF_SIZE, + &grp_result) || (grp_result == NULL)) { error("Could not find configured group %s", group_name); - setgrent(); return NULL; } - for (i = 0;; i++) { - if (group_struct_ptr->gr_mem[i] == NULL) + my_gid = grp_result->gr_gid; + + for (uid_cnt=0; ; uid_cnt++) { + if (grp_result->gr_mem[uid_cnt] == NULL) break; } - uid_cnt = i; group_uids = (uid_t *) xmalloc(sizeof(uid_t) * (uid_cnt + 1)); - memset(group_uids, 0, (sizeof(uid_t) * (uid_cnt + 1))); - + j = 0; - for (i = 0; i < uid_cnt; i++) { - user_pw_ptr = getpwnam(group_struct_ptr->gr_mem[i]); - if (user_pw_ptr) { - if (user_pw_ptr->pw_uid) - group_uids[j++] = user_pw_ptr->pw_uid; - } else + for (i=0; i<uid_cnt; i++) { + my_uid = uid_from_string(grp_result->gr_mem[i]); + if (my_uid == (uid_t) -1) { error("Could not find user %s in configured group %s", - group_struct_ptr->gr_mem[i], group_name); - setpwent(); + grp_result->gr_mem[i], group_name); + } else if (my_uid) { + group_uids[j++] = my_uid; + } } - - while((user_pw_ptr = getpwent())) { - if(user_pw_ptr->pw_gid != group_struct_ptr->gr_gid) + + setpwent(); +#ifdef HAVE_AIX + while (!getpwent_r(&pw, pw_buffer, PW_BUF_SIZE, &fp)) { + pwd_result = &pw; +#else + while (!getpwent_r(&pw, pw_buffer, PW_BUF_SIZE, &pwd_result)) { +#endif + if (pwd_result->pw_gid != my_gid) continue; j++; - xrealloc(group_uids, ((j+1) * sizeof(uid_t))); - group_uids[j-1] = user_pw_ptr->pw_uid; + xrealloc(group_uids, ((j+1) * sizeof(uid_t))); + group_uids[j-1] = pwd_result->pw_uid; } - setpwent(); - setgrent(); + endpwent(); + return group_uids; } diff --git a/src/slurmctld/ping_nodes.c b/src/slurmctld/ping_nodes.c index 85da41b8d..1317f9ae1 100644 --- a/src/slurmctld/ping_nodes.c +++ b/src/slurmctld/ping_nodes.c @@ -1,6 +1,5 @@ /*****************************************************************************\ * ping_nodes.c - ping the slurmd daemons to test if they respond - * Note: there is a global node table (node_record_table_ptr) ***************************************************************************** * Copyright (C) 2003-2006 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -66,9 +65,6 @@ static pthread_mutex_t lock_mutex = PTHREAD_MUTEX_INITIALIZER; static int ping_count = 0; -static void _run_health_check(void); - -/* struct timeval start_time, end_time; */ /* * is_ping_done - test if the last node ping cycle has completed. @@ -115,17 +111,6 @@ void ping_end (void) else fatal ("ping_count < 0"); slurm_mutex_unlock(&lock_mutex); - -#if 0 - gettimeofday(&end_time, NULL); - start = start_time.tv_sec; - start *= 1000000; - start += start_time.tv_usec; - end = end_time.tv_sec; - end *= 1000000; - end += end_time.tv_usec; - info("done with ping took %ld",(end-start)); -#endif } /* @@ -138,7 +123,6 @@ void ping_nodes (void) int i; time_t now, still_live_time, node_dead_time; static time_t last_ping_time = (time_t) 0; - static time_t last_health_check = (time_t) 0; uint16_t base_state, no_resp_flag; bool restart_flag; hostlist_t down_hostlist = NULL; @@ -147,13 +131,6 @@ void ping_nodes (void) agent_arg_t *reg_agent_args = NULL; now = time (NULL); - if (slurmctld_conf.health_check_interval && - (difftime(now, last_health_check) >= - slurmctld_conf.health_check_interval)) { - last_health_check = now; - _run_health_check(); - return; - } ping_agent_args = xmalloc (sizeof (agent_arg_t)); ping_agent_args->msg_type = REQUEST_PING; @@ -164,7 +141,6 @@ void ping_nodes (void) reg_agent_args->msg_type = REQUEST_NODE_REGISTRATION_STATUS; reg_agent_args->retry = 0; reg_agent_args->hostlist = hostlist_create(""); - /* gettimeofday(&start_time, NULL); */ /* * If there are a large number of down nodes, the node ping @@ -197,13 +173,14 @@ void ping_nodes (void) base_state = node_ptr->node_state & NODE_STATE_BASE; no_resp_flag = node_ptr->node_state & NODE_STATE_NO_RESPOND; - if ((slurmctld_conf.slurmd_timeout == 0) - && (base_state != NODE_STATE_UNKNOWN)) + if ((slurmctld_conf.slurmd_timeout == 0) && + (base_state != NODE_STATE_UNKNOWN) && + (no_resp_flag == 0)) continue; - if ((node_ptr->last_response != (time_t) 0) - && (node_ptr->last_response <= node_dead_time) - && (base_state != NODE_STATE_DOWN)) { + if ((node_ptr->last_response != (time_t) 0) && + (node_ptr->last_response <= node_dead_time) && + (base_state != NODE_STATE_DOWN)) { if (down_hostlist) (void) hostlist_push_host(down_hostlist, node_ptr->name); @@ -211,6 +188,7 @@ void ping_nodes (void) down_hostlist = hostlist_create(node_ptr->name); set_node_down(node_ptr->name, "Not responding"); + node_ptr->not_responding = false; /* logged below */ continue; } @@ -239,7 +217,8 @@ void ping_nodes (void) continue; } - if (node_ptr->last_response >= still_live_time) + if ((!no_resp_flag) && + (node_ptr->last_response >= still_live_time)) continue; /* Do not keep pinging down nodes since this can induce @@ -285,7 +264,8 @@ void ping_nodes (void) } } -static void _run_health_check(void) +/* Spawn health check function for every node that is not DOWN */ +extern void run_health_check(void) { int i; uint16_t base_state; diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 3451467bd..43e396a35 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -137,6 +137,14 @@ inline static void _slurm_rpc_accounting_update_msg(slurm_msg_t *msg); */ void slurmctld_req (slurm_msg_t * msg) { + /* Just to validate the cred */ + (void) g_slurm_auth_get_uid(msg->auth_cred, NULL); + if (g_slurm_auth_errno(msg->auth_cred) != SLURM_SUCCESS) { + error("Bad authentication: %s", + g_slurm_auth_errstr(g_slurm_auth_errno(msg->auth_cred))); + return; + } + switch (msg->msg_type) { case REQUEST_RESOURCE_ALLOCATION: _slurm_rpc_allocate_resources(msg); @@ -983,6 +991,7 @@ static void _slurm_rpc_job_step_kill(slurm_msg_t * msg) /* do RPC call */ if (job_step_kill_msg->job_step_id == SLURM_BATCH_SCRIPT) { + /* NOTE: SLURM_BATCH_SCRIPT == NO_VAL */ error_code = job_signal(job_step_kill_msg->job_id, job_step_kill_msg->signal, job_step_kill_msg->batch_flag, uid); @@ -1638,7 +1647,7 @@ static void _slurm_rpc_reconfigure_controller(slurm_msg_t * msg) static void _slurm_rpc_shutdown_controller(slurm_msg_t * msg) { int error_code = SLURM_SUCCESS, i; - uint16_t core_arg = 0; + uint16_t options = 0; shutdown_msg_t *shutdown_msg = (shutdown_msg_t *) msg->data; uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); /* Locks: Read node */ @@ -1657,17 +1666,19 @@ static void _slurm_rpc_shutdown_controller(slurm_msg_t * msg) slurmctld_config.resume_backup = true; } else { info("Performing RPC: REQUEST_SHUTDOWN"); - core_arg = shutdown_msg->core; + options = shutdown_msg->options; } /* do RPC call */ - if (error_code); - else if (core_arg) + if (error_code) + ; + else if (options == 1) info("performing immeditate shutdown without state save"); else if (slurmctld_config.shutdown_time) debug2("shutdown RPC issued when already in progress"); else { - if (msg->msg_type == REQUEST_SHUTDOWN) { + if ((msg->msg_type == REQUEST_SHUTDOWN) && + (options == 0)) { /* This means (msg->msg_type != REQUEST_CONTROL) */ lock_slurmctld(node_read_lock); msg_to_slurmd(REQUEST_SHUTDOWN); @@ -1699,7 +1710,7 @@ static void _slurm_rpc_shutdown_controller(slurm_msg_t * msg) slurm_send_rc_msg(msg, error_code); - if ((error_code == SLURM_SUCCESS) && core_arg && + if ((error_code == SLURM_SUCCESS) && (options == 1) && (slurmctld_config.thread_id_sig)) pthread_kill(slurmctld_config.thread_id_sig, SIGABRT); } diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index cb1910654..8a31935ff 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -272,7 +272,7 @@ static int _init_all_slurm_conf(void) int error_code; char *conf_name = xstrdup(slurmctld_conf.slurm_conf); - slurm_conf_reinit_nolock(conf_name); + slurm_conf_reinit(conf_name); xfree(conf_name); if ((error_code = init_node_conf())) diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 8ef6ed0d5..20f05fba9 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -196,6 +196,8 @@ struct node_record { uint16_t node_state; /* enum node_states, ORed with * NODE_STATE_NO_RESPOND if not * responding */ + bool not_responding; /* set if fails to respond, + * clear after logging this */ time_t last_response; /* last response from the node */ time_t last_idle; /* time node last become idle */ uint16_t cpus; /* count of processors on the node */ @@ -235,6 +237,7 @@ extern uint32_t total_cpus; /* count of CPUs in the entire cluster */ extern bitstr_t *idle_node_bitmap; /* bitmap of idle nodes */ extern bitstr_t *share_node_bitmap; /* bitmap of sharable nodes */ extern bitstr_t *up_node_bitmap; /* bitmap of up nodes, not DOWN */ +extern bool ping_nodes_now; /* if set, ping nodes immediately */ /*****************************************************************************\ * PARTITION parameters and data structures @@ -516,6 +519,21 @@ enum select_data_info { * Global slurmctld functions \*****************************************************************************/ +/* + * abort_job_on_node - Kill the specific job_id on a specific node, + * the request is not processed immediately, but queued. + * This is to prevent a flood of pthreads if slurmctld restarts + * without saved state and slurmd daemons register with a + * multitude of running jobs. Slurmctld will not recognize + * these jobs and use this function to kill them - one + * agent request per node as they register. + * IN job_id - id of the job to be killed + * IN job_ptr - pointer to terminating job (NULL if unknown, e.g. orphaned) + * IN node_ptr - pointer to the node on which the job resides + */ +extern void abort_job_on_node(uint32_t job_id, struct job_record *job_ptr, + struct node_record *node_ptr); + /* * bitmap2node_name - given a bitmap, build a list of comma separated node * names. names may include regular expressions (e.g. "lx[01-10]") @@ -993,20 +1011,14 @@ extern void job_time_limit (void); extern int kill_job_by_part_name(char *part_name); /* - * kill_job_on_node - Kill the specific job_id on a specific node, - * the request is not processed immediately, but queued. - * This is to prevent a flood of pthreads if slurmctld restarts - * without saved state and slurmd daemons register with a - * multitude of running jobs. Slurmctld will not recognize - * these jobs and use this function to kill them - one + * kill_job_on_node - Kill the specific job_id on a specific node. * agent request per node as they register. * IN job_id - id of the job to be killed * IN job_ptr - pointer to terminating job (NULL if unknown, e.g. orphaned) * IN node_ptr - pointer to the node on which the job resides */ -extern void kill_job_on_node(uint32_t job_id, - struct job_record *job_ptr, - struct node_record *node_ptr); +extern void kill_job_on_node(uint32_t job_id, struct job_record *job_ptr, + struct node_record *node_ptr); /* * kill_running_job_by_node_name - Given a node name, deallocate jobs @@ -1120,6 +1132,10 @@ extern void node_did_resp (char *name); */ extern void node_not_resp (char *name, time_t msg_time); +/* For every node with the "not_responding" flag set, clear the flag + * and log that the node is not responding using a hostlist expression */ +extern void node_no_resp_msg(void); + /* * job_alloc_info - get details about an existing job allocation * IN uid - job issuing the code @@ -1287,6 +1303,9 @@ extern void resume_job_step(struct job_record *job_ptr); * mode, assuming control when the primary controller stops responding */ extern void run_backup(void); +/* Spawn health check function for every node that is not DOWN */ +extern void run_health_check(void); + /* save_all_state - save entire slurmctld state for later recovery */ extern void save_all_state(void); @@ -1301,7 +1320,7 @@ extern void set_node_down (char *name, char *reason); /* * set_slurmctld_state_loc - create state directory as needed and "cd" to it */ -extern int set_slurmctld_state_loc(void); +extern void set_slurmctld_state_loc(void); /* set_slurmd_addr - establish the slurm_addr for the slurmd on each node * Uses common data structures. */ diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index 0fe8834bc..11fa939f3 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * step_mgr.c - manage the job step information of slurm - * $Id: step_mgr.c 14548 2008-07-17 22:00:36Z jette $ + * $Id: step_mgr.c 14621 2008-07-24 15:24:59Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -1127,7 +1127,7 @@ extern int pack_ctld_job_step_info_response_msg(uint32_t job_id, while ((job_ptr = (struct job_record *) list_next(job_iterator))) { - if (((show_flags & SHOW_ALL) == 0) && + if (((show_flags & SHOW_ALL) == 0) && (uid != 0) && (job_ptr->part_ptr) && (job_ptr->part_ptr->hidden)) continue; diff --git a/src/slurmctld/trigger_mgr.c b/src/slurmctld/trigger_mgr.c index 1e38a5986..00d112a67 100644 --- a/src/slurmctld/trigger_mgr.c +++ b/src/slurmctld/trigger_mgr.c @@ -950,7 +950,8 @@ static void _trigger_node_event(trig_mgr_info_t *trig_in, time_t now) * may be sufficient. */ static void _trigger_run_program(trig_mgr_info_t *trig_in) { - char program[1024], arg0[1024], arg1[1024], user_name[1024], *pname; + char program[1024], arg0[1024], arg1[1024], user_name[1024]; + char *pname, *uname; uid_t uid; gid_t gid; pid_t child; @@ -967,7 +968,9 @@ static void _trigger_run_program(trig_mgr_info_t *trig_in) strncpy(arg1, trig_in->res_id, sizeof(arg1)); uid = trig_in->user_id; gid = trig_in->group_id; - snprintf(user_name, sizeof(user_name), "%s", uid_to_string(uid)); + uname = uid_to_string(uid); + snprintf(user_name, sizeof(user_name), "%s", uname); + xfree(uname); child = fork(); if (child > 0) { diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index d5314fced..f46df89eb 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -70,6 +70,7 @@ #include "src/common/xstring.h" #include "src/common/xmalloc.h" #include "src/common/list.h" +#include "src/common/uid.h" #include "src/common/util-net.h" #include "src/common/forward.h" #include "src/common/read_config.h" @@ -112,6 +113,7 @@ static bool _job_still_running(uint32_t job_id); static int _kill_all_active_steps(uint32_t jobid, int sig, bool batch); static int _terminate_all_steps(uint32_t jobid, bool batch); static void _rpc_launch_tasks(slurm_msg_t *); +static void _rpc_abort_job(slurm_msg_t *); static void _rpc_batch_job(slurm_msg_t *); static void _rpc_signal_tasks(slurm_msg_t *); static void _rpc_checkpoint_tasks(slurm_msg_t *); @@ -241,6 +243,12 @@ slurmd_req(slurm_msg_t *msg) last_slurmctld_msg = time(NULL); slurm_free_suspend_msg(msg->data); break; + case REQUEST_ABORT_JOB: + debug2("Processing RPC: REQUEST_ABORT_JOB"); + last_slurmctld_msg = time(NULL); + _rpc_abort_job(msg); + slurm_free_kill_job_msg(msg->data); + break; case REQUEST_TERMINATE_JOB: debug2("Processing RPC: REQUEST_TERMINATE_JOB"); last_slurmctld_msg = time(NULL); @@ -319,15 +327,14 @@ _send_slurmstepd_init(int fd, slurmd_step_type_t type, void *req, Buf buffer = NULL; slurm_msg_t msg; uid_t uid = (uid_t)-1; - struct passwd pwd, *pwd_ptr; - char *pwd_buf; - size_t buf_size; gids_t *gids = NULL; int rank; int parent_rank, children, depth, max_depth; char *parent_alias = NULL; slurm_addr parent_addr = {0}; + char pwd_buffer[PW_BUF_SIZE]; + struct passwd pwd, *pwd_result; slurm_msg_t_init(&msg); /* send type over to slurmstepd */ @@ -452,10 +459,8 @@ _send_slurmstepd_init(int fd, slurmd_step_type_t type, void *req, /* send cached group ids array for the relevant uid */ debug3("_send_slurmstepd_init: call to getpwuid_r"); - buf_size = sysconf(_SC_GETPW_R_SIZE_MAX); - pwd_buf = xmalloc(buf_size); - if (getpwuid_r(uid, &pwd, pwd_buf, buf_size, &pwd_ptr)) { - xfree(pwd_buf); + if (getpwuid_r(uid, &pwd, pwd_buffer, PW_BUF_SIZE, &pwd_result) || + (pwd_result == NULL)) { error("_send_slurmstepd_init getpwuid_r: %m"); len = 0; safe_write(fd, &len, sizeof(int)); @@ -463,7 +468,8 @@ _send_slurmstepd_init(int fd, slurmd_step_type_t type, void *req, } debug3("_send_slurmstepd_init: return from getpwuid_r"); - if ((gids = _gids_cache_lookup(pwd.pw_name, pwd.pw_gid))) { + if ((gids = _gids_cache_lookup(pwd_result->pw_name, + pwd_result->pw_gid))) { int i; uint32_t tmp32; safe_write(fd, &gids->ngids, sizeof(int)); @@ -475,7 +481,6 @@ _send_slurmstepd_init(int fd, slurmd_step_type_t type, void *req, len = 0; safe_write(fd, &len, sizeof(int)); } - xfree(pwd_buf); return 0; rwfail: @@ -776,6 +781,8 @@ _rpc_launch_tasks(slurm_msg_t *msg) slurm_get_ip_str(cli, &port, host, sizeof(host)); info("launch task %u.%u request from %u.%u@%s (port %hu)", req->job_id, req->job_step_id, req->uid, req->gid, host, port); + env_array_append(&req->env, "SLURM_SRUN_COMM_HOST", host); + req->envc = envcount(req->env); first_job_run = !slurm_cred_jobid_cached(conf->vctx, req->job_id); if (_check_job_credential(req, req_uid, nodeid, &step_hset) < 0) { @@ -894,10 +901,9 @@ _prolog_error(batch_job_launch_msg_t *req, int rc) static void _get_user_env(batch_job_launch_msg_t *req) { - struct passwd pwd, *pwd_ptr; - char *pwd_buf = NULL; + struct passwd pwd, *pwd_ptr = NULL; + char pwd_buf[PW_BUF_SIZE]; char **new_env; - size_t buf_size; int i; for (i=0; i<req->argc; i++) { @@ -907,9 +913,8 @@ _get_user_env(batch_job_launch_msg_t *req) if (i >= req->argc) return; /* don't need to load env */ - buf_size = sysconf(_SC_GETPW_R_SIZE_MAX); - pwd_buf = xmalloc(buf_size); - if (getpwuid_r(req->uid, &pwd, pwd_buf, buf_size, &pwd_ptr)) { + if (getpwuid_r(req->uid, &pwd, pwd_buf, PW_BUF_SIZE, &pwd_ptr) || + (pwd_ptr == NULL)) { error("getpwuid_r(%u):%m", req->uid); } else { verbose("get env for user %s here", pwd.pw_name); @@ -929,7 +934,6 @@ _get_user_env(batch_job_launch_msg_t *req) "running only with passed environment"); } } - xfree(pwd_buf); } /* The RPC currently contains a memory size limit, but we load the @@ -1737,7 +1741,7 @@ _rpc_timelimit(slurm_msg_t *msg) slurm_close_accepted_conn(msg->conn_fd); msg->conn_fd = -1; - _kill_all_active_steps(req->job_id, SIGXCPU, true); + _kill_all_active_steps(req->job_id, SIG_TIME_LIMIT, true); nsteps = xcpu_signal(SIGTERM, req->nodes) + _kill_all_active_steps(req->job_id, SIGTERM, false); verbose( "Job %u: timeout: sent SIGTERM to %d active steps", @@ -2550,6 +2554,76 @@ _rpc_suspend_job(slurm_msg_t *msg) } } +/* Job shouldn't even be runnin here, abort it immediately */ +static void +_rpc_abort_job(slurm_msg_t *msg) +{ + kill_job_msg_t *req = msg->data; + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); + char *bg_part_id = NULL; + + debug("_rpc_abort_job, uid = %d", uid); + /* + * check that requesting user ID is the SLURM UID + */ + if (!_slurm_authorized_user(uid)) { + error("Security violation: abort_job(%ld) from uid %ld", + req->job_id, (long) uid); + if (msg->conn_fd >= 0) + slurm_send_rc_msg(msg, ESLURM_USER_ID_MISSING); + return; + } + + slurmd_release_resources(req->job_id); + + /* + * "revoke" all future credentials for this jobid + */ + if (slurm_cred_revoke(conf->vctx, req->job_id, req->time) < 0) { + debug("revoking cred for job %u: %m", req->job_id); + } else { + save_cred_state(conf->vctx); + debug("credential for job %u revoked", req->job_id); + } + + /* + * At this point, if connection still open, we send controller + * a "success" reply to indicate that we've recvd the msg. + */ + if (msg->conn_fd >= 0) { + slurm_send_rc_msg(msg, SLURM_SUCCESS); + if (slurm_close_accepted_conn(msg->conn_fd) < 0) + error ("rpc_abort_job: close(%d): %m", msg->conn_fd); + msg->conn_fd = -1; + } + + if ((xcpu_signal(SIGKILL, req->nodes) + + _kill_all_active_steps(req->job_id, SIG_ABORT, true)) ) { + /* + * Block until all user processes are complete. + */ + _pause_for_job_completion (req->job_id, req->nodes, 0); + } + + /* + * Begin expiration period for cached information about job. + * If expiration period has already begun, then do not run + * the epilog again, as that script has already been executed + * for this job. + */ + if (slurm_cred_begin_expiration(conf->vctx, req->job_id) < 0) { + debug("Not running epilog for jobid %d: %m", req->job_id); + return; + } + + save_cred_state(conf->vctx); + + select_g_get_jobinfo(req->select_jobinfo, SELECT_DATA_BLOCK_ID, + &bg_part_id); + _run_epilog(req->job_id, req->job_uid, bg_part_id); + xfree(bg_part_id); +} + static void _rpc_terminate_job(slurm_msg_t *msg) { @@ -3097,10 +3171,14 @@ _getgroups(void) extern void init_gids_cache(int cache) { - struct passwd *pwd; + struct passwd pw, *pwd; int ngids; gid_t *orig_gids; gids_t *gids; + char buf[BUF_SIZE]; +#ifdef HAVE_AIX + FILE *fp = NULL; +#endif if (!cache) { _gids_cache_purge(); @@ -3114,7 +3192,14 @@ init_gids_cache(int cache) orig_gids = (gid_t *)xmalloc(ngids * sizeof(gid_t)); getgroups(ngids, orig_gids); - while ((pwd = getpwent())) { +#ifdef HAVE_AIX + setpwent(&fp); + while (!getpwent_r(&pw, buf, BUF_SIZE, &fp)) { + pwd = &pw; +#else + setpwent(); + while (!getpwent_r(&pw, buf, BUF_SIZE, &pwd)) { +#endif if (_gids_cache_lookup(pwd->pw_name, pwd->pw_gid)) continue; if (initgroups(pwd->pw_name, pwd->pw_gid)) { @@ -3128,7 +3213,11 @@ init_gids_cache(int cache) continue; _gids_cache_register(pwd->pw_name, pwd->pw_gid, gids); } +#ifdef HAVE_AIX + endpwent_r(&fp); +#else endpwent(); +#endif setgroups(ngids, orig_gids); xfree(orig_gids); diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index 4b5ef4d48..a92c1ae0d 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/mgr.c - job manager functions for slurmstepd - * $Id: mgr.c 14504 2008-07-14 17:38:53Z jette $ + * $Id: mgr.c 14702 2008-08-05 22:18:13Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -242,7 +242,15 @@ _batch_finish(slurmd_job_t *job, int rc) if (job->batchdir && (rmdir(job->batchdir) < 0)) error("rmdir(%s): %m", job->batchdir); xfree(job->batchdir); - if ((job->stepid == NO_VAL) || (job->stepid == SLURM_BATCH_SCRIPT)) { + if (job->aborted) { + if ((job->stepid == NO_VAL) || + (job->stepid == SLURM_BATCH_SCRIPT)) { + info("step %u.%u abort completed", + job->jobid, job->stepid); + } else + info("job %u abort completed", job->jobid); + } else if ((job->stepid == NO_VAL) || + (job->stepid == SLURM_BATCH_SCRIPT)) { verbose("job %u completed with slurm_rc = %d, job_rc = %d", job->jobid, rc, step_complete.step_rc); _send_complete_batch_script_msg(job, rc, job->task[0]->estatus); @@ -309,7 +317,9 @@ cleanup1: error("batch script setup failed for job %u.%u", msg->job_id, msg->step_id); - if (msg->step_id == SLURM_BATCH_SCRIPT) { + if (job->aborted) + verbose("job %u abort complete", job->jobid); + else if (msg->step_id == SLURM_BATCH_SCRIPT) { _send_complete_batch_script_msg(job, ESLURMD_CREATE_BATCH_DIR_ERROR, -1); } else @@ -420,6 +430,8 @@ _send_exit_msg(slurmd_job_t *job, uint32_t *tid, int n, int status) msg.task_id_list = tid; msg.num_tasks = n; msg.return_code = status; + msg.job_id = job->jobid; + msg.step_id = job->stepid; slurm_msg_t_init(&resp); resp.data = &msg; resp.msg_type = MESSAGE_TASK_EXIT; @@ -517,8 +529,8 @@ _one_step_complete_msg(slurmd_job_t *job, int first, int last) /************* acct stuff ********************/ if(!acct_sent) { jobacct_gather_g_aggregate(step_complete.jobacct, job->jobacct); - jobacct_gather_g_getinfo(step_complete.jobacct, JOBACCT_DATA_TOTAL, - msg.jobacct); + jobacct_gather_g_getinfo(step_complete.jobacct, + JOBACCT_DATA_TOTAL, msg.jobacct); acct_sent = true; } /*********************************************/ @@ -805,7 +817,9 @@ job_manager(slurmd_job_t *job) _send_launch_resp(job, rc); } - if (job->batch) { + if (job->aborted) + info("job_manager exiting with aborted job"); + else if (job->batch) { _batch_finish(job, rc); /* sends batch complete message */ } else if (step_complete.rank > -1) { _wait_for_children_slurmstepd(job); diff --git a/src/slurmd/slurmstepd/pdebug.c b/src/slurmd/slurmstepd/pdebug.c index 59a26f87d..c4a91e3f4 100644 --- a/src/slurmd/slurmstepd/pdebug.c +++ b/src/slurmd/slurmstepd/pdebug.c @@ -47,7 +47,7 @@ pdebug_trace_process(slurmd_job_t *job, pid_t pid) * child's ptrace(PTRACE_TRACEME), then SIGSTOP, and * ptrace(PTRACE_DETACH). This requires a kernel patch, * which you may already have in place for TotalView. - * If not, apply the kernel patch in etc/ptrace.patch + * If not, apply the kernel patch in contribs/ptrace.patch */ if (job->task_flags & TASK_PARALLEL_DEBUG) { diff --git a/src/slurmd/slurmstepd/req.c b/src/slurmd/slurmstepd/req.c index 3d85eb0ba..ebfbd430a 100644 --- a/src/slurmd/slurmstepd/req.c +++ b/src/slurmd/slurmstepd/req.c @@ -1,8 +1,8 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/req.c - slurmstepd domain socket request handling - * $Id: req.c 14422 2008-07-02 21:48:19Z jette $ ***************************************************************************** - * Copyright (C) 2005 The Regents of the University of California. + * Copyright (C) 2005-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Christopher Morrone <morrone2@llnl.gov> * LLNL-CODE-402394. @@ -48,16 +48,17 @@ #include <signal.h> #include <time.h> -#include "src/common/xstring.h" -#include "src/common/xmalloc.h" #include "src/common/fd.h" #include "src/common/eio.h" +#include "src/common/parse_time.h" +#include "src/slurmd/common/proctrack.h" #include "src/common/slurm_auth.h" #include "src/common/slurm_jobacct_gather.h" #include "src/common/stepd_api.h" +#include "src/common/xmalloc.h" +#include "src/common/xstring.h" #include "src/slurmd/slurmd/slurmd.h" -#include "src/slurmd/common/proctrack.h" #include "src/slurmd/slurmstepd/slurmstepd.h" #include "src/slurmd/slurmstepd/slurmstepd_job.h" #include "src/slurmd/slurmstepd/req.h" @@ -690,6 +691,7 @@ _handle_signal_container(int fd, slurmd_job_t *job, uid_t uid) int rc = SLURM_SUCCESS; int errnum = 0; int sig; + static int msg_sent = 0; debug("_handle_signal_container for job %u.%u", job->jobid, job->stepid); @@ -717,34 +719,43 @@ _handle_signal_container(int fd, slurmd_job_t *job, uid_t uid) goto done; } - if (job->nodeid == 0) { - static int msg_sent = 0; - char *entity; - if (job->stepid == SLURM_BATCH_SCRIPT) - entity = "JOB"; - else - entity = "STEP"; + if ((job->nodeid == 0) && (msg_sent == 0)) { + time_t now = time(NULL); + char entity[24], time_str[24]; + if (job->stepid == SLURM_BATCH_SCRIPT) { + snprintf(entity, sizeof(entity), "JOB %u", job->jobid); + } else { + snprintf(entity, sizeof(entity), "STEP %u.%u", + job->jobid, job->stepid); + } + slurm_make_time_str(&now, time_str, sizeof(time_str)); + /* Not really errors, * but we want messages displayed by default */ - if (msg_sent) - ; - else if (sig == SIGXCPU) { - error("*** %s CANCELLED DUE TO TIME LIMIT ***", entity); + if (sig == SIG_TIME_LIMIT) { + error("*** %s CANCELLED AT %s DUE TO TIME LIMIT ***", + entity, time_str); msg_sent = 1; } else if (sig == SIG_NODE_FAIL) { - error("*** %s CANCELLED DUE TO NODE FAILURE ***", entity); + error("*** %s CANCELLED AT %s DUE TO NODE FAILURE ***", + entity, time_str); msg_sent = 1; } else if (sig == SIG_FAILURE) { error("*** %s FAILED (non-zero exit code or other " "failure mode) ***", entity); msg_sent = 1; } else if ((sig == SIGTERM) || (sig == SIGKILL)) { - error("*** %s CANCELLED ***", entity); + error("*** %s CANCELLED AT %s ***", entity, time_str); msg_sent = 1; } } - if ((sig == SIG_NODE_FAIL) || (sig == SIG_FAILURE)) + if ((sig == SIG_TIME_LIMIT) || (sig == SIG_NODE_FAIL) || + (sig == SIG_FAILURE)) goto done; + if (sig == SIG_ABORT) { + sig = SIGKILL; + job->aborted = true; + } pthread_mutex_lock(&suspend_mutex); if (suspended && (sig != SIGKILL)) { diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c index 0f82a07c7..89641b908 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.c +++ b/src/slurmd/slurmstepd/slurmstepd_job.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/slurmstepd_job.c - slurmd_job_t routines - * $Id: slurmstepd_job.c 14546 2008-07-17 21:03:59Z jette $ + * $Id: slurmstepd_job.c 14753 2008-08-12 22:40:54Z da $ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -124,11 +124,13 @@ _valid_gid(struct passwd *pwd, gid_t *gid) } /* Allow user root to use any valid gid */ - if (pwd->pw_uid == 0) + if (pwd->pw_uid == 0) { + pwd->pw_gid = *gid; return 1; - + } for (i = 0; grp->gr_mem[i]; i++) { if (strcmp(pwd->pw_name,grp->gr_mem[i]) == 0) { + pwd->pw_gid = *gid; return 1; } } diff --git a/src/slurmd/slurmstepd/slurmstepd_job.h b/src/slurmd/slurmstepd/slurmstepd_job.h index 5c1419d65..2bdbb46bd 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.h +++ b/src/slurmd/slurmstepd/slurmstepd_job.h @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/slurmstepd_job.h slurmd_job_t definition - * $Id: slurmstepd_job.h 14546 2008-07-17 21:03:59Z jette $ + * $Id: slurmstepd_job.h 14702 2008-08-05 22:18:13Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -138,6 +138,7 @@ typedef struct slurmd_job { gid_t gid; /* group ID for job */ int ngids; /* length of the following gids array */ gid_t *gids; /* array of gids for user specified in uid */ + bool aborted; /* true if already aborted */ bool batch; /* true if this is a batch job */ bool run_prolog; /* true if need to run prolog */ bool user_managed_io; diff --git a/src/slurmdbd/proc_req.c b/src/slurmdbd/proc_req.c index a56331026..089171f22 100644 --- a/src/slurmdbd/proc_req.c +++ b/src/slurmdbd/proc_req.c @@ -42,6 +42,7 @@ #include "src/common/jobacct_common.h" #include "src/common/slurm_protocol_api.h" #include "src/common/slurm_protocol_defs.h" +#include "src/common/uid.h" #include "src/slurmdbd/read_config.h" #include "src/slurmdbd/rpc_mgr.h" #include "src/slurmctld/slurmctld.h" @@ -327,7 +328,7 @@ static int _add_accounts(void *db_conn, char *comment = NULL; debug2("DBD_ADD_ACCOUNTS: called"); - if(*uid != slurmdbd_conf->slurm_user_id + if((*uid != slurmdbd_conf->slurm_user_id && *uid != 0) && assoc_mgr_get_admin_level(db_conn, *uid) < ACCT_ADMIN_OPERATOR) { acct_user_rec_t user; @@ -382,7 +383,7 @@ static int _add_account_coords(void *db_conn, } debug2("DBD_ADD_ACCOUNT_COORDS: called"); - if(*uid != slurmdbd_conf->slurm_user_id + if((*uid != slurmdbd_conf->slurm_user_id && *uid != 0) && assoc_mgr_get_admin_level(db_conn, *uid) < ACCT_ADMIN_OPERATOR) { ListIterator itr = NULL; ListIterator itr2 = NULL; @@ -454,7 +455,7 @@ static int _add_assocs(void *db_conn, goto end_it; } - if(*uid != slurmdbd_conf->slurm_user_id + if((*uid != slurmdbd_conf->slurm_user_id && *uid != 0) && assoc_mgr_get_admin_level(db_conn, *uid) < ACCT_ADMIN_OPERATOR) { ListIterator itr = NULL; ListIterator itr2 = NULL; @@ -517,7 +518,7 @@ static int _add_clusters(void *db_conn, char *comment = NULL; debug2("DBD_ADD_CLUSTERS: called"); - if(*uid != slurmdbd_conf->slurm_user_id + if((*uid != slurmdbd_conf->slurm_user_id && *uid != 0) && assoc_mgr_get_admin_level(db_conn, *uid) < ACCT_ADMIN_SUPER_USER) { comment = "Your user doesn't have privilege to preform this action"; error("%s", comment); @@ -551,7 +552,7 @@ static int _add_qos(void *db_conn, char *comment = NULL; debug2("DBD_ADD_QOS: called"); - if(*uid != slurmdbd_conf->slurm_user_id + if((*uid != slurmdbd_conf->slurm_user_id && *uid != 0) && (assoc_mgr_get_admin_level(db_conn, *uid) < ACCT_ADMIN_SUPER_USER)) { comment = "Your user doesn't have privilege to preform this action"; @@ -585,7 +586,7 @@ static int _add_users(void *db_conn, dbd_list_msg_t *get_msg = NULL; char *comment = NULL; debug2("DBD_ADD_USERS: called"); - if(*uid != slurmdbd_conf->slurm_user_id + if((*uid != slurmdbd_conf->slurm_user_id && *uid != 0) && assoc_mgr_get_admin_level(db_conn, *uid) < ACCT_ADMIN_OPERATOR) { acct_user_rec_t user; @@ -633,7 +634,7 @@ static int _cluster_procs(void *db_conn, int rc = SLURM_SUCCESS; char *comment = NULL; - if (*uid != slurmdbd_conf->slurm_user_id) { + if ((*uid != slurmdbd_conf->slurm_user_id && *uid != 0)) { comment = "DBD_CLUSTER_PROCS message from invalid uid"; error("DBD_CLUSTER_PROCS message from invalid uid %u", *uid); rc = ESLURM_ACCESS_DENIED; @@ -780,9 +781,9 @@ static int _get_jobs(void *db_conn, sacct_params.opt_uid = -1; if(get_jobs_msg->user) { - struct passwd *pw = NULL; - if ((pw=getpwnam(get_jobs_msg->user))) - sacct_params.opt_uid = pw->pw_uid; + uid_t pw_uid = uid_from_string(get_jobs_msg->user); + if (pw_uid != (uid_t) -1) + sacct_params.opt_uid = pw_uid; } list_msg.my_list = jobacct_storage_g_get_jobs( @@ -985,7 +986,7 @@ static int _flush_jobs(void *db_conn, int rc = SLURM_SUCCESS; char *comment = NULL; - if (*uid != slurmdbd_conf->slurm_user_id) { + if ((*uid != slurmdbd_conf->slurm_user_id && *uid != 0)) { comment = "DBD_FLUSH_JOBS message from invalid uid"; error("DBD_FLUSH_JOBS message from invalid uid %u", *uid); rc = ESLURM_ACCESS_DENIED; @@ -1247,7 +1248,7 @@ static int _modify_accounts(void *db_conn, char *comment = NULL; debug2("DBD_MODIFY_ACCOUNTS: called"); - if(*uid != slurmdbd_conf->slurm_user_id + if((*uid != slurmdbd_conf->slurm_user_id && *uid != 0) && assoc_mgr_get_admin_level(db_conn, *uid) < ACCT_ADMIN_OPERATOR) { comment = "Your user doesn't have privilege to preform this action"; error("%s", comment); @@ -1361,7 +1362,7 @@ static int _modify_clusters(void *db_conn, dbd_modify_msg_t *get_msg = NULL; char *comment = NULL; - if(*uid != slurmdbd_conf->slurm_user_id + if((*uid != slurmdbd_conf->slurm_user_id && *uid != 0) && assoc_mgr_get_admin_level(db_conn, *uid) < ACCT_ADMIN_SUPER_USER) { comment = "Your user doesn't have privilege to preform this action"; @@ -1421,11 +1422,37 @@ static int _modify_users(void *db_conn, int rc = SLURM_SUCCESS; dbd_modify_msg_t *get_msg = NULL; char *comment = NULL; - + int same_user = 0; + int admin_level = assoc_mgr_get_admin_level(db_conn, *uid); + acct_user_cond_t *user_cond = NULL; + acct_user_rec_t *user_rec = NULL; + debug2("DBD_MODIFY_USERS: called"); - if(*uid != slurmdbd_conf->slurm_user_id - && assoc_mgr_get_admin_level(db_conn, *uid) < ACCT_ADMIN_OPERATOR) { + if (slurmdbd_unpack_modify_msg(DBD_MODIFY_USERS, &get_msg, in_buffer) != + SLURM_SUCCESS) { + comment = "Failed to unpack DBD_MODIFY_USERS message"; + error("%s", comment); + *out_buffer = make_dbd_rc_msg(SLURM_ERROR, + comment, DBD_MODIFY_USERS); + return SLURM_ERROR; + } + + user_cond = (acct_user_cond_t *)get_msg->cond; + user_rec = (acct_user_rec_t *)get_msg->rec; + + if((*uid != slurmdbd_conf->slurm_user_id && *uid != 0) + && admin_level < ACCT_ADMIN_OPERATOR) { + if(user_cond && user_cond->assoc_cond + && user_cond->assoc_cond->user_list + && (list_count(user_cond->assoc_cond->user_list) == 1)) { + uid_t pw_uid = uid_from_string( + list_peek(user_cond->assoc_cond->user_list)); + if (pw_uid == *uid) { + same_user = 1; + goto is_same_user; + } + } comment = "Your user doesn't have privilege to preform this action"; error("%s", comment); *out_buffer = make_dbd_rc_msg(ESLURM_ACCESS_DENIED, @@ -1434,26 +1461,33 @@ static int _modify_users(void *db_conn, return ESLURM_ACCESS_DENIED; } - if (slurmdbd_unpack_modify_msg(DBD_MODIFY_USERS, &get_msg, in_buffer) != - SLURM_SUCCESS) { - comment = "Failed to unpack DBD_MODIFY_USERS message"; - error("%s", comment); - *out_buffer = make_dbd_rc_msg(SLURM_ERROR, - comment, DBD_MODIFY_USERS); - return SLURM_ERROR; +is_same_user: + + /* same_user can only alter the default account nothing else */ + if(same_user) { + /* If we add anything else here for the user we will + * need to document it + */ + if((user_rec->admin_level != ACCT_ADMIN_NOTSET) + || (user_rec->qos_list)) { + comment = "You can only change your own default account, nothing else"; + error("%s", comment); + *out_buffer = make_dbd_rc_msg(ESLURM_ACCESS_DENIED, + comment, DBD_MODIFY_USERS); + + return ESLURM_ACCESS_DENIED; + } } - if(((acct_user_rec_t *)get_msg->rec)->admin_level != ACCT_ADMIN_NOTSET - && *uid != slurmdbd_conf->slurm_user_id - && assoc_mgr_get_admin_level(db_conn, *uid) - < ((acct_user_rec_t *)get_msg->rec)->admin_level) { + if((user_rec->admin_level != ACCT_ADMIN_NOTSET) + && (*uid != slurmdbd_conf->slurm_user_id && *uid != 0) + && (admin_level < user_rec->admin_level)) { comment = "You have to be the same or higher admin level to change another persons"; - ((acct_user_rec_t *)get_msg->rec)->admin_level = - ACCT_ADMIN_NOTSET; + user_rec->admin_level = ACCT_ADMIN_NOTSET; } if(!(list_msg.my_list = acct_storage_g_modify_users( - db_conn, *uid, get_msg->cond, get_msg->rec))) { + db_conn, *uid, user_cond, user_rec))) { if(errno == ESLURM_ACCESS_DENIED) { comment = "Your user doesn't have privilege to preform this action"; rc = ESLURM_ACCESS_DENIED; @@ -1641,7 +1675,7 @@ static int _remove_accounts(void *db_conn, debug2("DBD_REMOVE_ACCOUNTS: called"); - if(*uid != slurmdbd_conf->slurm_user_id + if((*uid != slurmdbd_conf->slurm_user_id && *uid != 0) && assoc_mgr_get_admin_level(db_conn, *uid) < ACCT_ADMIN_OPERATOR) { comment = "Your user doesn't have privilege to preform this action"; error("%s", comment); @@ -1813,7 +1847,7 @@ static int _remove_clusters(void *db_conn, debug2("DBD_REMOVE_CLUSTERS: called"); - if(*uid != slurmdbd_conf->slurm_user_id + if((*uid != slurmdbd_conf->slurm_user_id && *uid != 0) && assoc_mgr_get_admin_level(db_conn, *uid) < ACCT_ADMIN_SUPER_USER) { comment = "Your user doesn't have privilege to preform this action"; @@ -1874,7 +1908,7 @@ static int _remove_qos(void *db_conn, debug2("DBD_REMOVE_QOS: called"); - if(*uid != slurmdbd_conf->slurm_user_id + if((*uid != slurmdbd_conf->slurm_user_id && *uid != 0) && assoc_mgr_get_admin_level(db_conn, *uid) < ACCT_ADMIN_SUPER_USER) { comment = "Your user doesn't have privilege to preform this action"; @@ -1935,7 +1969,7 @@ static int _remove_users(void *db_conn, debug2("DBD_REMOVE_USERS: called"); - if(*uid != slurmdbd_conf->slurm_user_id + if((*uid != slurmdbd_conf->slurm_user_id && *uid != 0) && assoc_mgr_get_admin_level(db_conn, *uid) < ACCT_ADMIN_OPERATOR) { comment = "Your user doesn't have privilege to preform this action"; error("%s", comment); @@ -1994,7 +2028,7 @@ static int _roll_usage(void *db_conn, info("DBD_ROLL_USAGE: called"); - if(*uid != slurmdbd_conf->slurm_user_id + if((*uid != slurmdbd_conf->slurm_user_id && *uid != 0) && assoc_mgr_get_admin_level(db_conn, *uid) < ACCT_ADMIN_OPERATOR) { comment = "Your user doesn't have privilege to preform this action"; error("%s", comment); diff --git a/src/slurmdbd/read_config.c b/src/slurmdbd/read_config.c index e14489454..83f1c5873 100644 --- a/src/slurmdbd/read_config.c +++ b/src/slurmdbd/read_config.c @@ -48,6 +48,7 @@ #include "src/common/log.h" #include "src/common/parse_config.h" #include "src/common/read_config.h" +#include "src/common/uid.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" #include "src/slurmdbd/read_config.h" @@ -153,7 +154,8 @@ extern int read_slurmdbd_conf(void) } s_p_get_uint16(&slurmdbd_conf->archive_age, "ArchiveAge", tbl); - s_p_get_string(&slurmdbd_conf->archive_script, "ArchiveScript", tbl); + s_p_get_string(&slurmdbd_conf->archive_script, "ArchiveScript", + tbl); s_p_get_string(&slurmdbd_conf->auth_info, "AuthInfo", tbl); s_p_get_string(&slurmdbd_conf->auth_type, "AuthType", tbl); s_p_get_string(&slurmdbd_conf->dbd_host, "DbdHost", tbl); @@ -174,7 +176,8 @@ extern int read_slurmdbd_conf(void) s_p_get_string(&slurmdbd_conf->plugindir, "PluginDir", tbl); s_p_get_string(&slurmdbd_conf->slurm_user_name, "SlurmUser", tbl); - if (!s_p_get_uint16(&slurmdbd_conf->step_purge, "StepPurge", tbl)) + if (!s_p_get_uint16(&slurmdbd_conf->step_purge, "StepPurge", + tbl)) slurmdbd_conf->step_purge = DEFAULT_SLURMDBD_STEP_PURGE; s_p_get_string(&slurmdbd_conf->storage_host, "StorageHost", tbl); @@ -196,7 +199,8 @@ extern int read_slurmdbd_conf(void) if (slurmdbd_conf->auth_type == NULL) slurmdbd_conf->auth_type = xstrdup(DEFAULT_SLURMDBD_AUTHTYPE); if (slurmdbd_conf->dbd_host == NULL) { - error("slurmdbd.conf lacks DbdHost parameter, using 'localhost'"); + error("slurmdbd.conf lacks DbdHost parameter, " + "using 'localhost'"); slurmdbd_conf->dbd_host = xstrdup("localhost"); } if (slurmdbd_conf->dbd_addr == NULL) @@ -208,13 +212,12 @@ extern int read_slurmdbd_conf(void) if(slurmdbd_conf->plugindir == NULL) slurmdbd_conf->plugindir = xstrdup(default_plugin_path); if (slurmdbd_conf->slurm_user_name) { - struct passwd *slurm_passwd; - slurm_passwd = getpwnam(slurmdbd_conf->slurm_user_name); - if (slurm_passwd == NULL) { + uid_t pw_uid = uid_from_string(slurmdbd_conf->slurm_user_name); + if (pw_uid == (uid_t) -1) { fatal("Invalid user for SlurmUser %s, ignored", slurmdbd_conf->slurm_user_name); } else - slurmdbd_conf->slurm_user_id = slurm_passwd->pw_uid; + slurmdbd_conf->slurm_user_id = pw_uid; } else { slurmdbd_conf->slurm_user_name = xstrdup("root"); slurmdbd_conf->slurm_user_id = 0; diff --git a/src/smap/job_functions.c b/src/smap/job_functions.c index 9a49b3729..db22f3a14 100644 --- a/src/smap/job_functions.c +++ b/src/smap/job_functions.c @@ -253,7 +253,7 @@ static int _print_text_job(job_info_t * job_ptr) char time_buf[20]; char tmp_cnt[8]; uint32_t node_cnt = 0; - char *ionodes = NULL; + char *ionodes = NULL, *uname; time_t now_time = time(NULL); #ifdef HAVE_BG @@ -294,9 +294,10 @@ static int _print_text_job(job_info_t * job_ptr) SELECT_PRINT_BG_ID)); main_xcord += 18; #endif + uname = uid_to_string((uid_t) job_ptr->user_id); mvwprintw(text_win, main_ycord, - main_xcord, "%.8s", - uid_to_string((uid_t) job_ptr->user_id)); + main_xcord, "%.8s", uname); + xfree(uname); main_xcord += 9; mvwprintw(text_win, main_ycord, main_xcord, "%.9s", job_ptr->name); @@ -367,7 +368,9 @@ static int _print_text_job(job_info_t * job_ptr) sizeof(time_buf), SELECT_PRINT_BG_ID)); #endif - printf("%8.8s ", uid_to_string((uid_t) job_ptr->user_id)); + uname = uid_to_string((uid_t) job_ptr->user_id); + printf("%8.8s ", uname); + xfree(uname); printf("%6.6s ", job_ptr->name); printf("%2.2s ", job_state_string_compact(job_ptr->job_state)); diff --git a/src/squeue/print.c b/src/squeue/print.c index 74b010636..6b4a016ca 100644 --- a/src/squeue/print.c +++ b/src/squeue/print.c @@ -370,9 +370,11 @@ int _print_job_user_name(job_info_t * job, int width, bool right, char* suffix) { if (job == NULL) /* Print the Header instead */ _print_str("USER", width, right, true); - else - _print_str(uid_to_string((uid_t) job->user_id), width, - right, true); + else { + char *uname = uid_to_string((uid_t) job->user_id); + _print_str(uname, width, right, true); + xfree(uname); + } if (suffix) printf("%s", suffix); return SLURM_SUCCESS; @@ -1171,9 +1173,11 @@ int _print_step_user_name(job_step_info_t * step, int width, bool right, { if (step == NULL) /* Print the Header instead */ _print_str("USER", width, right, true); - else - _print_str(uid_to_string((uid_t) step->user_id), width, - right, true); + else { + char *uname = uid_to_string((uid_t) step->user_id); + _print_str(uname, width, right, true); + xfree(uname); + } if (suffix) printf("%s", suffix); return SLURM_SUCCESS; diff --git a/src/squeue/sort.c b/src/squeue/sort.c index 2a0bce637..ad66c2dcd 100644 --- a/src/squeue/sort.c +++ b/src/squeue/sort.c @@ -609,6 +609,8 @@ static int _sort_job_by_user_name(void *void1, void *void2) name1 = uid_to_string((uid_t) job1->user_id); name2 = uid_to_string((uid_t) job2->user_id); diff = strcmp(name1, name2); + xfree(name1); + xfree(name2); if (reverse_order) diff = -diff; @@ -755,6 +757,8 @@ static int _sort_step_by_user_name(void *void1, void *void2) name1 = uid_to_string((uid_t) step1->user_id); name2 = uid_to_string((uid_t) step2->user_id); diff = strcmp(name1, name2); + xfree(name1); + xfree(name2); if (reverse_order) diff = -diff; diff --git a/src/sreport/cluster_reports.c b/src/sreport/cluster_reports.c index 8c39260dc..752eb59e7 100644 --- a/src/sreport/cluster_reports.c +++ b/src/sreport/cluster_reports.c @@ -265,6 +265,7 @@ extern int cluster_utilization(int argc, char *argv[]) List cluster_list = NULL; List format_list = list_create(slurm_destroy_char); + int field_count = 0; print_fields_list = list_create(destroy_print_field); @@ -285,11 +286,14 @@ extern int cluster_utilization(int argc, char *argv[]) print_fields_header(print_fields_list); + field_count = list_count(print_fields_list); + while((cluster = list_next(itr))) { cluster_accounting_rec_t *accting = NULL; cluster_accounting_rec_t total_acct; uint64_t total_reported = 0; uint64_t local_total_time = 0; + int curr_inx = 1; if(!cluster->accounting_list || !list_count(cluster->accounting_list)) @@ -317,45 +321,62 @@ extern int cluster_utilization(int argc, char *argv[]) switch(field->type) { case PRINT_CLUSTER_NAME: field->print_routine(field, - cluster->name); + cluster->name, + (curr_inx == + field_count)); break; case PRINT_CLUSTER_CPUS: field->print_routine(field, - total_acct.cpu_count); + total_acct.cpu_count, + (curr_inx == + field_count)); break; case PRINT_CLUSTER_ACPU: field->print_routine(field, total_acct.alloc_secs, - total_reported); + total_reported, + (curr_inx == + field_count)); break; case PRINT_CLUSTER_DCPU: field->print_routine(field, total_acct.down_secs, - total_reported); + total_reported, + (curr_inx == + field_count)); break; case PRINT_CLUSTER_ICPU: field->print_routine(field, total_acct.idle_secs, - total_reported); + total_reported, + (curr_inx == + field_count)); break; case PRINT_CLUSTER_RCPU: field->print_routine(field, total_acct.resv_secs, - total_reported); + total_reported, + (curr_inx == + field_count)); break; case PRINT_CLUSTER_OCPU: field->print_routine(field, total_acct.over_secs, - total_reported); + total_reported, + (curr_inx == + field_count)); break; case PRINT_CLUSTER_TOTAL: field->print_routine(field, total_reported, - local_total_time); + local_total_time, + (curr_inx == + field_count)); break; default: break; } + curr_inx++; } list_iterator_reset(itr2); printf("\n"); diff --git a/src/sreport/common.c b/src/sreport/common.c index 9df97591b..256306c00 100644 --- a/src/sreport/common.c +++ b/src/sreport/common.c @@ -40,14 +40,18 @@ #include "sreport.h" extern void sreport_print_time(print_field_t *field, - uint64_t value, uint64_t total_time) + uint64_t value, uint64_t total_time, int last) { if(!total_time) total_time = 1; /* (value == unset) || (value == cleared) */ if((value == NO_VAL) || (value == INFINITE)) { - if(print_fields_parsable_print) + if(print_fields_parsable_print + == PRINT_FIELDS_PARSABLE_NO_ENDING + && last) + ; + else if(print_fields_parsable_print) printf("|"); else printf("%-*s ", field->len, " "); @@ -75,7 +79,11 @@ extern void sreport_print_time(print_field_t *field, break; } - if(print_fields_parsable_print) + if(print_fields_parsable_print + == PRINT_FIELDS_PARSABLE_NO_ENDING + && last) + printf("%s", output); + else if(print_fields_parsable_print) printf("%s|", output); else printf("%*s ", field->len, output); diff --git a/src/sreport/job_reports.c b/src/sreport/job_reports.c index 654ca7b02..d8d024530 100644 --- a/src/sreport/job_reports.c +++ b/src/sreport/job_reports.c @@ -714,16 +714,18 @@ no_assocs: while((cluster_group = list_next(cluster_itr))) { acct_itr = list_iterator_create(cluster_group->acct_list); while((acct_group = list_next(acct_itr))) { + while((field = list_next(itr))) { switch(field->type) { case PRINT_JOB_CLUSTER: field->print_routine( field, - cluster_group->cluster); + cluster_group->cluster, 0); break; case PRINT_JOB_ACCOUNT: field->print_routine(field, - acct_group->acct); + acct_group->acct, + 0); break; default: break; @@ -748,7 +750,7 @@ no_assocs: list_iterator_destroy(local_itr); total_field.print_routine(&total_field, acct_group->cpu_secs, - cluster_group->cpu_secs); + cluster_group->cpu_secs, 1); printf("\n"); } diff --git a/src/sreport/sreport.c b/src/sreport/sreport.c index 08715ad0e..74e7153e8 100644 --- a/src/sreport/sreport.c +++ b/src/sreport/sreport.c @@ -80,6 +80,7 @@ main (int argc, char *argv[]) {"immediate",0, 0, 'i'}, {"no_header", 0, 0, 'n'}, {"parsable", 0, 0, 'p'}, + {"parsable2", 0, 0, 'P'}, {"quiet", 0, 0, 'q'}, {"usage", 0, 0, 'h'}, {"verbose", 0, 0, 'v'}, @@ -94,7 +95,7 @@ main (int argc, char *argv[]) quiet_flag = 0; log_init("sreport", opts, SYSLOG_FACILITY_DAEMON, NULL); - while((opt_char = getopt_long(argc, argv, "ahnpqt:vV", + while((opt_char = getopt_long(argc, argv, "ahnpPqt:vV", long_options, &option_index)) != -1) { switch (opt_char) { case (int)'?': @@ -113,7 +114,12 @@ main (int argc, char *argv[]) print_fields_have_header = 0; break; case (int)'p': - print_fields_parsable_print = 1; + print_fields_parsable_print = + PRINT_FIELDS_PARSABLE_ENDING; + break; + case (int)'P': + print_fields_parsable_print = + PRINT_FIELDS_PARSABLE_NO_ENDING; break; case (int)'q': quiet_flag = 1; @@ -494,7 +500,8 @@ sreport [<OPTION>] [<COMMAND>] \n\ -h or --help: equivalent to \"help\" command \n\ -n or --no_header: equivalent to \"no_header\" command \n\ -q or --quiet: equivalent to \"quiet\" command \n\ - -p or --parsable: equivalent to \"parsable\" command \n\ + -p or --parsable: output will be '|' delimited with a '|' at the end \n\ + -P or --parsable2: output will be '|' delimited without a '|' at the end\n\ -v or --verbose: equivalent to \"verbose\" command \n\ -V or --version: equivalent to \"version\" command \n\ \n\ @@ -505,7 +512,9 @@ sreport [<OPTION>] [<COMMAND>] \n\ Valid <COMMAND> values are: \n\ exit terminate sreport \n\ help print this description of use. \n\ - quiet print no messages other than error messages. \n\ + parsable output will be | delimited with an ending '|'\n\ + parsable2 output will be | delimited without an ending '|'\n\ + quiet print no messages other than error messages. \n\ quit terminate this command. \n\ verbose enable detailed logging. \n\ version display tool version number. \n\ diff --git a/src/sreport/sreport.h b/src/sreport/sreport.h index d8f25640d..bd450fb78 100644 --- a/src/sreport/sreport.h +++ b/src/sreport/sreport.h @@ -103,7 +103,7 @@ extern uint32_t my_uid; extern int all_clusters_flag; extern void sreport_print_time(print_field_t *field, - uint64_t value, uint64_t total_time); + uint64_t value, uint64_t total_time, int last); extern int parse_option_end(char *option); extern char *strip_quotes(char *option, int *increased); extern int set_start_end_time(time_t *start, time_t *end); diff --git a/src/sreport/user_reports.c b/src/sreport/user_reports.c index a1496a48f..4e2347787 100644 --- a/src/sreport/user_reports.c +++ b/src/sreport/user_reports.c @@ -282,6 +282,7 @@ extern int user_top(int argc, char *argv[]) local_user_rec_t *local_user = NULL; local_cluster_rec_t *local_cluster = NULL; print_field_t *field = NULL; + int field_count = 0; print_fields_list = list_create(destroy_print_field); @@ -423,6 +424,8 @@ extern int user_top(int argc, char *argv[]) itr2 = list_iterator_create(print_fields_list); print_fields_header(print_fields_list); + field_count = list_count(print_fields_list); + list_iterator_reset(cluster_itr); while((local_cluster = list_next(cluster_itr))) { list_sort(local_cluster->user_list, (ListCmpF)_sort_user_dec); @@ -430,6 +433,7 @@ extern int user_top(int argc, char *argv[]) itr = list_iterator_create(local_cluster->user_list); while((local_user = list_next(itr))) { int count = 0; + int curr_inx = 1; while((field = list_next(itr2))) { char *tmp_char = NULL; struct passwd *pwd = NULL; @@ -449,17 +453,21 @@ extern int user_top(int argc, char *argv[]) list_iterator_destroy(itr3); field->print_routine( field, - tmp_char); + tmp_char, + (curr_inx == field_count)); xfree(tmp_char); break; case PRINT_USER_CLUSTER: field->print_routine( field, - local_cluster->name); + local_cluster->name, + (curr_inx == field_count)); break; case PRINT_USER_LOGIN: field->print_routine(field, - local_user->name); + local_user->name, + (curr_inx == + field_count)); break; case PRINT_USER_PROPER: pwd = getpwnam(local_user->name); @@ -467,21 +475,25 @@ extern int user_top(int argc, char *argv[]) tmp_char = strtok(pwd->pw_gecos, ","); if(!tmp_char) - tmp_char = + tmp_char = pwd->pw_gecos; } field->print_routine(field, - tmp_char); + tmp_char, + (curr_inx == + field_count)); break; case PRINT_USER_USED: field->print_routine( field, local_user->cpu_secs, - local_cluster->cpu_secs); + local_cluster->cpu_secs, + (curr_inx == field_count)); break; default: break; } + curr_inx++; } list_iterator_reset(itr2); printf("\n"); diff --git a/src/srun/allocate.c b/src/srun/allocate.c index 505f6b697..a87f3ce25 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/srun/allocate.c - srun functions for managing node allocations - * $Id: allocate.c 14570 2008-07-18 22:06:26Z da $ + * $Id: allocate.c 14684 2008-08-01 19:57:23Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). diff --git a/src/srun/opt.c b/src/srun/opt.c index 94bfff45f..dd6eddef0 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -565,7 +565,6 @@ static void _opt_default() char buf[MAXPATHLEN + 1]; struct passwd *pw; int i; - char hostname[64]; if ((pw = getpwuid(getuid())) != NULL) { strncpy(opt.user, pw->pw_name, MAX_USERNAME); @@ -690,8 +689,7 @@ static void _opt_default() opt.task_prolog = NULL; opt.task_epilog = NULL; - gethostname_short(hostname, sizeof(hostname)); - opt.ctrl_comm_ifhn = xstrdup(hostname); + opt.ctrl_comm_ifhn = NULL; /* * Reset some default values if running under a parallel debugger diff --git a/src/srun/srun.c b/src/srun/srun.c index b02333867..c23266235 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -72,6 +72,7 @@ #include "src/common/fd.h" +#include "src/common/hostlist.h" #include "src/common/log.h" #include "src/common/slurm_protocol_api.h" #include "src/common/switch.h" @@ -94,6 +95,12 @@ #include "src/api/pmi_server.h" #include "src/api/step_launch.h" +#if defined (HAVE_DECL_STRSIGNAL) && !HAVE_DECL_STRSIGNAL +# ifndef strsignal + extern char *strsignal(int); +# endif +#endif /* defined HAVE_DECL_STRSIGNAL && !HAVE_DECL_STRSIGNAL */ + #define MAX_RETRIES 20 #define MAX_ENTRIES 50 @@ -910,41 +917,83 @@ _handle_max_wait(int signo) _terminate_job_step(job->step_ctx); } +static char * +_taskids_to_nodelist(bitstr_t *tasks_exited) +{ + int i; + char *hostname, *hostlist_str; + hostlist_t hostlist; + job_step_create_response_msg_t *step_resp; + slurm_step_layout_t *step_layout; + + if (!job->step_ctx) { + error("No step_ctx"); + hostlist_str = xstrdup("Unknown"); + return hostlist_str; + } + + slurm_step_ctx_get(job->step_ctx, SLURM_STEP_CTX_RESP, &step_resp); + step_layout = step_resp->step_layout; + hostlist = hostlist_create(NULL); + for (i=0; i<job->ntasks; i++) { + if (!bit_test(tasks_exited, i)) + continue; + hostname = slurm_step_layout_host_name(step_layout, i); + hostlist_push(hostlist, hostname); + } + hostlist_uniq(hostlist); + hostlist_str = xmalloc(2048); + hostlist_ranged_string(hostlist, 2048, hostlist_str); + hostlist_destroy(hostlist); + return hostlist_str; +} + static void _task_finish(task_exit_msg_t *msg) { + bitstr_t *tasks_exited = NULL; + char buf[2048], *core_str = "", *msg_str, *node_list = NULL; static bool first_done = true; static bool first_error = true; int rc = 0; int i; - verbose("%d tasks finished (rc=%u)", + verbose("%u tasks finished (rc=%u)", msg->num_tasks, msg->return_code); + tasks_exited = bit_alloc(job->ntasks); + for (i=0; i<msg->num_tasks; i++) + bit_set(tasks_exited, msg->task_id_list[i]); + bit_fmt(buf, sizeof(buf), tasks_exited); if (WIFEXITED(msg->return_code)) { rc = WEXITSTATUS(msg->return_code); if (rc != 0) { - for (i = 0; i < msg->num_tasks; i++) { - error("task %u exited with exit code %d", - msg->task_id_list[i], rc); - bit_set(task_state.finish_abnormal, - msg->task_id_list[i]); - } + bit_or(task_state.finish_abnormal, tasks_exited); + node_list = _taskids_to_nodelist(tasks_exited); + error("%s: task %s: Exited with exit code %d", + node_list, buf, rc); } else { - for (i = 0; i < msg->num_tasks; i++) { - bit_set(task_state.finish_normal, - msg->task_id_list[i]); - } + bit_or(task_state.finish_normal, tasks_exited); + verbose("task %s: Completed", buf); } } else if (WIFSIGNALED(msg->return_code)) { - for (i = 0; i < msg->num_tasks; i++) { - verbose("task %u killed by signal %d", - msg->task_id_list[i], - WTERMSIG(msg->return_code)); - bit_set(task_state.finish_abnormal, - msg->task_id_list[i]); - } + bit_or(task_state.finish_abnormal, tasks_exited); rc = 1; + msg_str = strsignal(WTERMSIG(msg->return_code)); +#ifdef WCOREDUMP + if (WCOREDUMP(msg->return_code)) + core_str = " (core dumped)"; +#endif + node_list = _taskids_to_nodelist(tasks_exited); + if (job->state >= SRUN_JOB_CANCELLED) { + verbose("%s: task %s: %s%s", + node_list, buf, msg_str, core_str); + } else { + error("%s: task %s: %s%s", + node_list, buf, msg_str, core_str); + } } + xfree(node_list); + bit_free(tasks_exited); global_rc = MAX(global_rc, rc); if (first_error && rc > 0 && opt.kill_bad_exit) { @@ -994,7 +1043,7 @@ _task_state_struct_print(void) bit_copybits(tmp, task_state.finish_abnormal); bit_and(tmp, not_seen); bit_fmt(buf, BUFSIZ, tmp); - info("task%s: exited abnormally", buf); + info("task %s: exited abnormally", buf); bit_or(seen, tmp); bit_copybits(not_seen, seen); bit_not(not_seen); @@ -1004,7 +1053,7 @@ _task_state_struct_print(void) bit_copybits(tmp, task_state.finish_normal); bit_and(tmp, not_seen); bit_fmt(buf, BUFSIZ, tmp); - info("task%s: exited", buf); + info("task %s: exited", buf); bit_or(seen, tmp); bit_copybits(not_seen, seen); bit_not(not_seen); @@ -1014,7 +1063,7 @@ _task_state_struct_print(void) bit_copybits(tmp, task_state.start_failure); bit_and(tmp, not_seen); bit_fmt(buf, BUFSIZ, tmp); - info("task%s: failed to start", buf); + info("task %s: failed to start", buf); bit_or(seen, tmp); bit_copybits(not_seen, seen); bit_not(not_seen); @@ -1024,7 +1073,7 @@ _task_state_struct_print(void) bit_copybits(tmp, task_state.start_success); bit_and(tmp, not_seen); bit_fmt(buf, BUFSIZ, tmp); - info("task%s: running", buf); + info("task %s: running", buf); bit_or(seen, tmp); bit_copybits(not_seen, seen); bit_not(not_seen); @@ -1039,7 +1088,7 @@ _task_state_struct_free(void) bit_free(task_state.finish_normal); bit_free(task_state.finish_abnormal); } - + static void _handle_intr() { static time_t last_intr = 0; @@ -1051,7 +1100,10 @@ static void _handle_intr() } if (((time(NULL) - last_intr) > 1) && !opt.disable_status) { - info("interrupt (one more within 1 sec to abort)"); + if (job->state < SRUN_JOB_FORCETERM) + info("interrupt (one more within 1 sec to abort)"); + else + info("interrupt (abort already in progress)"); _task_state_struct_print(); last_intr = time(NULL); } else { /* second Ctrl-C in half as many seconds */ @@ -1068,7 +1120,7 @@ static void _handle_intr() job->jobid, job->stepid); last_intr_sent = time(NULL); slurm_step_launch_fwd_signal(job->step_ctx, SIGINT); - + slurm_step_launch_abort(job->step_ctx); } else { job_force_termination(job); slurm_step_launch_abort(job->step_ctx); diff --git a/src/srun/srun_job.c b/src/srun/srun_job.c index 10d9daf4a..48f7d5a68 100644 --- a/src/srun/srun_job.c +++ b/src/srun/srun_job.c @@ -1,8 +1,8 @@ /****************************************************************************\ * srun_job.c - job data structure creation functions - * $Id$ ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Mark Grondona <grondona@llnl.gov>. * LLNL-CODE-402394. @@ -388,8 +388,25 @@ job_state(srun_job_t *job) void job_force_termination(srun_job_t *job) { - info ("forcing job termination"); - update_job_state(job, SRUN_JOB_FORCETERM); + static int kill_sent = 0; + static time_t last_msg = 0; + + if (kill_sent == 0) { + info("forcing job termination"); + /* Sends SIGKILL to tasks directly */ + update_job_state(job, SRUN_JOB_FORCETERM); + } else { + time_t now = time(NULL); + if (last_msg != now) { + info("job abort in progress"); + last_msg = now; + } + if (kill_sent == 1) { + /* Try sending SIGKILL through slurmctld */ + slurm_kill_job_step(job->jobid, job->stepid, SIGKILL); + } + } + kill_sent++; } static inline int diff --git a/src/sstat/sstat.c b/src/sstat/sstat.c index 33c19cc51..c7d47f452 100644 --- a/src/sstat/sstat.c +++ b/src/sstat/sstat.c @@ -117,7 +117,7 @@ int _sstat_query(slurm_step_layout_t *step_layout, uint32_t job_id, msg.msg_type = MESSAGE_STAT_JOBACCT; msg.data = &r; - ret_list = slurm_send_recv_msgs(step_layout->node_list, &msg, 0); + ret_list = slurm_send_recv_msgs(step_layout->node_list, &msg, 0, false); if (!ret_list) { error("got an error no list returned"); goto cleanup; diff --git a/src/sview/job_info.c b/src/sview/job_info.c index 7155dc4b4..9b5e3b9aa 100644 --- a/src/sview/job_info.c +++ b/src/sview/job_info.c @@ -1028,7 +1028,7 @@ static void _layout_job_record(GtkTreeView *treeview, sview_job_info_t *sview_job_info_ptr, int update) { - char *nodes = NULL; + char *nodes = NULL, *uname = NULL; char tmp_char[50]; time_t now_time = time(NULL); job_info_t *job_ptr = sview_job_info_ptr->job_ptr; @@ -1128,10 +1128,11 @@ static void _layout_job_record(GtkTreeView *treeview, sizeof(tmp_char), SELECT_PRINT_BG_ID)); #endif + uname = uid_to_string((uid_t)job_ptr->user_id); add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_job, - SORTID_USER), - uid_to_string((uid_t)job_ptr->user_id)); + SORTID_USER), uname); + xfree(uname); group_info = getgrgid((gid_t) job_ptr->group_id ); if ( group_info && group_info->gr_name[ 0 ] ) { snprintf(tmp_char, sizeof(tmp_char), "%s", @@ -1419,7 +1420,7 @@ static void _update_job_record(sview_job_info_t *sview_job_info_ptr, GtkTreeStore *treestore, GtkTreeIter *iter) { - char *nodes = NULL; + char *nodes = NULL, *uname = NULL; char tmp_char[50]; time_t now_time = time(NULL); GtkTreeIter step_iter; @@ -1558,9 +1559,10 @@ static void _update_job_record(sview_job_info_t *sview_job_info_ptr, SELECT_PRINT_RAMDISK_IMAGE), -1); #endif + uname = uid_to_string((uid_t)job_ptr->user_id); gtk_tree_store_set(treestore, iter, - SORTID_USER, - uid_to_string((uid_t)job_ptr->user_id), -1); + SORTID_USER, uname, -1); + xfree(uname); group_info = getgrgid((gid_t) job_ptr->group_id ); if ( group_info && group_info->gr_name[ 0 ] ) { snprintf(tmp_char, sizeof(tmp_char), "%s", @@ -1718,7 +1720,7 @@ static void _layout_step_record(GtkTreeView *treeview, job_step_info_t *step_ptr, int update) { - char *nodes = NULL; + char *nodes = NULL, *uname; char tmp_char[50]; char tmp_time[50]; time_t now_time = time(NULL); @@ -1782,10 +1784,11 @@ static void _layout_step_record(GtkTreeView *treeview, /* sizeof(tmp_char), */ /* SELECT_PRINT_BG_ID)); */ /* #endif */ + uname = uid_to_string((uid_t)step_ptr->user_id); add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_job, - SORTID_USER), - uid_to_string((uid_t)step_ptr->user_id)); + SORTID_USER), uname); + xfree(uname); add_display_treestore_line(update, treestore, &iter, find_col_name(display_data_job, SORTID_NAME), @@ -1813,7 +1816,7 @@ static void _update_step_record(job_step_info_t *step_ptr, GtkTreeStore *treestore, GtkTreeIter *iter) { - char *nodes = NULL; + char *nodes = NULL, *uname = NULL; char tmp_char[50]; char tmp_time[50]; time_t now_time = time(NULL); @@ -1862,9 +1865,10 @@ static void _update_step_record(job_step_info_t *step_ptr, /* sizeof(tmp_char), */ /* SELECT_PRINT_BG_ID), -1); */ /* #endif */ + uname = uid_to_string((uid_t)step_ptr->user_id); gtk_tree_store_set(treestore, iter, - SORTID_USER, - uid_to_string((uid_t)step_ptr->user_id), -1); + SORTID_USER, uname, -1); + xfree(uname); gtk_tree_store_set(treestore, iter, SORTID_NAME, step_ptr->name, -1); @@ -2720,10 +2724,10 @@ extern void specific_info_job(popup_info_t *popup_win) sview_job_info_t *sview_job_info_ptr = NULL; job_info_t *job_ptr = NULL; ListIterator itr = NULL; - char name[30]; + char name[30], *uname = NULL; char *host = NULL, *host2 = NULL; hostlist_t hostlist = NULL; - int found = 0; + int found = 0, name_diff; if(!spec_info->display_widget) setup_popup_info(popup_win, display_data_job, SORTID_CNT); @@ -2863,8 +2867,11 @@ display_it: case SEARCH_JOB_USER: if(!search_info->gchar_data) continue; - if(strcmp(uid_to_string(job_ptr->user_id), - search_info->gchar_data)) + uname = uid_to_string(job_ptr->user_id); + name_diff = strcmp(uname, + search_info->gchar_data); + xfree(uname); + if(name_diff) continue; break; case SEARCH_JOB_STATE: diff --git a/testsuite/expect/README b/testsuite/expect/README index cfb33bea5..6d2bbda32 100644 --- a/testsuite/expect/README +++ b/testsuite/expect/README @@ -127,7 +127,7 @@ test1.31 Verify that SLURM directed environment variables are processed: SLURM_STDOUTMODE. test1.32 Test of srun signal forwarding test1.33 Test of srun application exit code reporting -test1.34 REMOVED +test1.34 Test of task signal abort message test1.35 Test of batch job with multiple concurrent job steps test1.36 Test parallel launch of srun (e.g. "srun srun hostname") test1.37 REMOVED @@ -291,6 +291,8 @@ test7.9 Test that no files are open in spawned tasks (except stdin, stdout, and stderr) to insure successful checkpoint/restart. test7.10 Test if we can trick SLURM into using the wrong user ID through an LD_PRELOAD option. +test7.11 Test of SPANK plugin. + test8.# Test of Blue Gene specific functionality. ================================================= @@ -302,6 +304,7 @@ test8.5 Confirm we can make a 32, 128, and 512 cnode block. test8.6 Stress test Dynamic mode block creation. test8.7 Test of Blue Gene scheduling with sched/wik2 plugin. + test9.# System stress testing. Exercises all commands and daemons. ===================================================================== test9.1 Stress test of stdin broadcast. @@ -504,3 +507,13 @@ test21.6 sacctmgr add multiple clusters test21.7 sacctmgr list clusters test21.8 sacctmgr modify a cluster test21.9 sacctmgr modify multiple clusters +test21.10 sacctmgr add an account +test21.11 sacctmgr add multiple accounts +test21.12 sacctmgr list multiple accounts +test21.13 sacctmgr modify account +test21.14 sacctmgr modify multiple accounts +test21.15 sacctmgr add an user +test21.16 sacctmgr add and list multiple users +test21.17 sacctmgr modify user +test21.18 sacctmgr modify multiple users +test21.19 sacctmgr add and delete coordinator diff --git a/testsuite/expect/globals b/testsuite/expect/globals index 05204c455..0126835ee 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -12,7 +12,7 @@ # ############################################################################ # Copyright (C) 2008 Lawrence Livermore National Security. -# Copyright (C) 2002-2006 The Regents of the University of California. +# Copyright (C) 2002-2007 The Regents of the University of California. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette <jette1@llnl.gov> # Additions by Joseph Donaghy <donaghy1@llnl.gov> @@ -69,7 +69,7 @@ cset smap "${slurm_dir}/bin/smap" cset squeue "${slurm_dir}/bin/squeue" cset srun "${slurm_dir}/bin/srun" cset sreport "${slurm_dir}/bin/sreport" -cset sstat "${slurm_dir}/bin/sstat" +cset sstat "${slurm_dir}/bin/sstat" cset strigger "${slurm_dir}/bin/strigger" cset pbsnodes "${slurm_dir}/bin/pbsnodes" @@ -118,7 +118,7 @@ cset bin_env "env" cset bin_file "file" cset bin_id "id" cset bin_grep "grep" -cset bin_hostname "hostname" +#cset bin_hostname "hostname" #Don't user $bin_hostname use $bin_printenv SLURMD_NODENAME cset bin_kill "kill" cset bin_make "make" cset bin_od "od" @@ -345,8 +345,10 @@ proc print_header { test_id } { # # Proc: wait_for_file # -# Purpose: Wait for the specified file to exist. This delay -# provides time for NFS files to be propogated. The +# Purpose: Wait for the specified file to exist and have a +# non-zero size. Note that if JobFileAppend=0 is +# configured, a file can exist and be purged then +# be re-created. # # Returns: A non-zero return code indicates a failure. # @@ -358,14 +360,8 @@ proc wait_for_file { file_name } { global bin_sleep max_file_delay for {set my_delay 0} {$my_delay <= $max_file_delay} {incr my_delay} { - if [file exists $file_name] { + if {[file exists $file_name] && [file exists $file_name]} { # Add small delay for I/O buffering - for {} {$my_delay <= $max_file_delay} {incr my_delay} { - if {[file size $file_name] != 0} { - break - } - exec $bin_sleep 1 - } exec $bin_sleep 2 return 0 } @@ -517,7 +513,6 @@ proc wait_for_step { step_id } { # Returns 1 if the system is enforcing associations, 0 otherwise # ################################################################ - proc test_assoc_enforced { } { global scontrol @@ -538,6 +533,66 @@ proc test_assoc_enforced { } { return $assoc_enforced } +################################################################ +# Proc: test_wiki_sched +# +# Return 1 if using sched/wiki or sched/wiki2 (Maui or Moab), +# 0 otherwise +################################################################ +proc test_wiki_sched { } { + global scontrol + + log_user 0 + set sched_wiki 0 + spawn $scontrol show config + expect { + -re "SchedulerType *= sched/wiki" { + set sched_wiki 1 + exp_continue + } + eof { + wait + } + } + log_user 1 + + return $sched_wiki +} + +################################################################ +# +# Proc: test_account_storage +# +# Purpose: Determine if we are using a usable accounting storage +# package. +# This is based upon +# the value of AccountingStorageType in the slurm.conf. +# +# Returns 1 if the system is running an accounting storage type +# that is complete, 0 otherwise +# +################################################################ + +proc test_account_storage { } { + global scontrol + + log_user 0 + set account_storage 0 + spawn $scontrol show config + expect { + -re "(accounting_storage/slurmdbd|accounting_storage/mysql)" { + set account_storage 1 + exp_continue + } + eof { + wait + } + } + log_user 1 + + return $account_storage +} + ################################################################ # # Proc: get_default_acct @@ -738,7 +793,7 @@ proc test_aix {} { ################################################################ proc test_super_user { } { - global alpha_numeric bin_id number scontrol super_user super_user_set + global alpha_numeric_under bin_id number scontrol super_user super_user_set if {$super_user_set != 0} { return $super_user @@ -772,7 +827,7 @@ proc test_super_user { } { spawn $bin_id -un set user "" expect { - -re "($alpha_numeric)" { + -re "($alpha_numeric_under)" { set user $expect_out(1,string) exp_continue } @@ -780,10 +835,10 @@ proc test_super_user { } { wait } } - spawn $scontrol show control + spawn $scontrol show config set slurm_user "" expect { - -re "SlurmUser *= ($alpha_numeric)" { + -re "SlurmUser *= ($alpha_numeric_under)" { set slurm_user $expect_out(1,string) exp_continue } @@ -816,6 +871,22 @@ proc dec2hex16 {value} { } } +# Create a 32 bit hex number from a signed decimal number +# Replace all non-decimal characters +# Courtesy of Chris Cornish +# http://aspn.activestate.com/ASPN/Cookbook/Tcl/Recipe/415982 +proc dec2hex {value} { + regsub -all {[^0-x\.-]} $value {} newtemp + set value [string trim $newtemp] + if {$value < 2147483647 && $value > -2147483648} { + set tempvalue [format "%#010X" [expr $value]] + return [string range $tempvalue 2 9] + } elseif {$value < -2147483647} { + return "80000000" + } else { + return "7FFFFFFF" + } +} ################################################################ # @@ -938,6 +1009,25 @@ proc make_bash_script { script_name script_contents } { exec $bin_chmod 700 $script_name } +################################################################ +# Given a hostname, return it's numeric suffix +################################################################ +proc get_suffix { hostname } { + set host_len [string length $hostname] + for {set host_inx [expr $host_len-1]} {$host_inx >= 0} {incr host_inx -1} { + set host_char [string index $hostname $host_inx] + if {[string compare $host_char "0"] < 0} { break } + if {[string compare $host_char "9"] > 0} { break } + } + incr host_inx + + if {$host_inx == $host_len} { + send_user "\nHostname lacks a suffix:$hostname\n" + return "-1" + } + return [string range $hostname $host_inx $host_len] +} + ################################################################ # Check if we are user root or SlurmUser # @@ -945,13 +1035,13 @@ proc make_bash_script { script_name script_contents } { ################################################################ proc is_super_user { } { - global alpha_numeric bin_id scontrol + global alpha_numeric_under bin_id scontrol log_user 0 set user_name "nobody" spawn $bin_id -u -n expect { - -re "($alpha_numeric)" { + -re "($alpha_numeric_under)" { set user_name $expect_out(1,string) exp_continue } diff --git a/testsuite/expect/regression b/testsuite/expect/regression index b59af8525..0c7c5d7c4 100755 --- a/testsuite/expect/regression +++ b/testsuite/expect/regression @@ -40,7 +40,7 @@ if [ ! -x ./regression ]; then exit 1 fi if [ ! -x ./globals ]; then - echo "FAILURE: copy 'globals.example' to 'globals' and modify as needed" + echo "FAILURE: 'globals' file not found" exit 1 fi @@ -51,7 +51,7 @@ fi # Run the tests in this directory /bin/date BEGIN_TIME=`date +%s` -for major in `seq 1 20`; do +for major in `seq 1 100`; do for minor in `seq 1 100`; do TEST=test${major}.${minor} if [ ! -f ./$TEST ]; then continue; fi diff --git a/testsuite/expect/test1.15 b/testsuite/expect/test1.15 index 4237f3afa..9cbbfd612 100755 --- a/testsuite/expect/test1.15 +++ b/testsuite/expect/test1.15 @@ -61,11 +61,11 @@ expect { incr matches exp_continue } - -re "task.0,2-9.: running" { # bracket escape not working + -re "task \\\[0,2-9\\\]: running" { incr matches exp_continue } - -re "exited with exit code" { + -re "Exited with exit code" { send_user "This error is expected, no worries\n" exp_continue } @@ -80,7 +80,7 @@ expect { } if {$matches != 2} { - send_user "\nFAILURE: problem with srun wait option\n" + send_user "\nFAILURE: problem with srun wait option matches was $matches\n" set exit_code 1 } if {$exit_code == 0} { diff --git a/testsuite/expect/test1.18 b/testsuite/expect/test1.18 index 38b442039..fdc0946e6 100755 --- a/testsuite/expect/test1.18 +++ b/testsuite/expect/test1.18 @@ -41,7 +41,7 @@ print_header $test_id # Run a job attempting to get some dummy license name # set match 0 -set srun_pid [spawn $srun --licenses=DUMMY_FOR_TESTING $bin_hostname] +set srun_pid [spawn $srun --licenses=DUMMY_FOR_TESTING $bin_printenv SLURMD_NODENAME] expect { -re "invalid license" { set match 1 @@ -80,7 +80,7 @@ expect { log_user 1 if {[string compare $licenses ""] != 0} { - set srun_pid [spawn $srun --licenses=$licenses $bin_hostname] + set srun_pid [spawn $srun --licenses=$licenses $bin_printenv SLURMD_NODENAME] expect { -re "invalid license" { send_user "\nFAILURE: Error getting license\n" diff --git a/testsuite/expect/test1.19 b/testsuite/expect/test1.19 index b4e33b966..0cef6672e 100755 --- a/testsuite/expect/test1.19 +++ b/testsuite/expect/test1.19 @@ -136,7 +136,7 @@ if {[wait_for_file $file_err_j_glob] == 0} { # file name and confirm it is created # set job_id 0 -set srun_pid [spawn $srun --output=$file_out_J -N1 -v -t1 $bin_hostname] +set srun_pid [spawn $srun --output=$file_out_J -N1 -v -t1 $bin_printenv SLURMD_NODENAME] expect { -re "jobid ($number).*" { set job_id $expect_out(1,string) @@ -173,7 +173,7 @@ set file_out_n_glob "test$test_id.n.$node_id.output" exec $bin_rm -f $file_out_n_glob set job_id 0 -set srun_pid [spawn $srun --output=$file_out_n -N1 -n2 -O -v -t1 $bin_hostname] +set srun_pid [spawn $srun --output=$file_out_n -N1 -n2 -O -v -t1 $bin_printenv SLURMD_NODENAME] expect { -re "jobid ($number).*" { set job_id $expect_out(1,string) @@ -219,8 +219,8 @@ if {$file_cnt != 1} { # exec $bin_rm -f $file_in make_bash_script $file_in " - $srun -n4 -O --output=$file_out_s $bin_hostname - $srun -n4 -O --output=$file_out_s $bin_hostname + $srun -n4 -O --output=$file_out_s $bin_printenv SLURMD_NODENAME + $srun -n4 -O --output=$file_out_s $bin_printenv SLURMD_NODENAME " for {set step_id 0} {$step_id < 4} {incr step_id} { diff --git a/testsuite/expect/test1.22 b/testsuite/expect/test1.22 index 69f73ee1b..61ec64965 100755 --- a/testsuite/expect/test1.22 +++ b/testsuite/expect/test1.22 @@ -53,9 +53,9 @@ if { [test_bluegene] } { } } -set srun_pid [spawn $srun -N$node_cnt -l --threads=1 -t1 $bin_hostname] +set srun_pid [spawn $srun -N$node_cnt -l --threads=1 -t1 $bin_printenv SLURMD_NODENAME] expect { - -re "0: ($alpha_numeric)" { + -re "0: ($alpha_numeric_under)" { set host_0 $expect_out(1,string) exp_continue } @@ -82,13 +82,13 @@ if {[string compare $host_0 ""] == 0} { # set host_0 "" set timeout $max_job_delay -set srun_pid [spawn $srun -N$node_cnt -n32 -O -l --threads=32 -t1 $bin_hostname] +set srun_pid [spawn $srun -N$node_cnt -n32 -O -l --threads=32 -t1 $bin_printenv SLURMD_NODENAME] expect { -re "Memory required by task is not available" { send_user "\nWARNING: DefMemPerTask is configured too high for this test\n" set host_0 "warn" } - -re "0: ($alpha_numeric)" { + -re "0: ($alpha_numeric_under)" { set host_0 $expect_out(1,string) exp_continue } diff --git a/testsuite/expect/test1.23 b/testsuite/expect/test1.23 index 2eb71e58c..121c2766f 100755 --- a/testsuite/expect/test1.23 +++ b/testsuite/expect/test1.23 @@ -38,20 +38,26 @@ set exit_code 0 print_header $test_id +if {[test_wiki_sched] == 1} { + send_user "\nWARNING: not compatable with sched/wiki (Maui)\n" + send_user " or sched/wiki2 (Moab) schedulers\n" + exit $exit_code +} + # # Submit a job with invalid mincpus requirement # set err_msg 0 set host_0 "" set timeout $max_job_delay -set srun_pid [spawn $srun -N1 -l --mincpus=999999 -t1 $bin_hostname] +set srun_pid [spawn $srun -N1 -l --mincpus=999999 -t1 $bin_printenv SLURMD_NODENAME] expect { -re "configuration is not available" { send_user "This error is expected, no worries\n" set err_msg 1 exp_continue } - -re "0: ($alpha_numeric)" { + -re "0: ($alpha_numeric_under)" { set host_0 $expect_out(1,string) exp_continue } @@ -79,9 +85,9 @@ if {$err_msg != 1} { # set host_0 "" set timeout $max_job_delay -set srun_pid [spawn $srun -N1 -l --mincpus=1 -t1 $bin_hostname] +set srun_pid [spawn $srun -N1 -l --mincpus=1 -t1 $bin_printenv SLURMD_NODENAME] expect { - -re "0: ($alpha_numeric)" { + -re "0: ($alpha_numeric_under)" { set host_0 $expect_out(1,string) exp_continue } @@ -106,14 +112,14 @@ if {[string compare $host_0 ""] == 0} { set err_msg 0 set host_0 "" set timeout $max_job_delay -set srun_pid [spawn $srun -N1 -l --mem=999999 -t1 $bin_hostname] +set srun_pid [spawn $srun -N1 -l --mem=999999 -t1 $bin_printenv SLURMD_NODENAME] expect { -re "not available" { send_user "This error is expected, no worries\n" set err_msg 1 exp_continue } - -re "0: ($alpha_numeric)" { + -re "0: ($alpha_numeric_under)" { set host_0 $expect_out(1,string) exp_continue } @@ -141,9 +147,9 @@ if {$err_msg != 1} { # set host_0 "" set timeout $max_job_delay -set srun_pid [spawn $srun -N1 -l --mem=1 -t1 $bin_hostname] +set srun_pid [spawn $srun -N1 -l --mem=1 -t1 $bin_printenv SLURMD_NODENAME] expect { - -re "0: ($alpha_numeric)" { + -re "0: ($alpha_numeric_under)" { set host_0 $expect_out(1,string) exp_continue } @@ -168,14 +174,14 @@ if {[string compare $host_0 ""] == 0} { set err_msg 0 set host_0 "" set timeout $max_job_delay -set srun_pid [spawn $srun -N1 -l --tmp=999999999 -t1 $bin_hostname] +set srun_pid [spawn $srun -N1 -l --tmp=999999999 -t1 $bin_printenv SLURMD_NODENAME] expect { -re "configuration is not available" { send_user "This error is expected, no worries\n" set err_msg 1 exp_continue } - -re "0: ($alpha_numeric)" { + -re "0: ($alpha_numeric_under)" { set host_0 $expect_out(1,string) exp_continue } @@ -203,9 +209,9 @@ if {$err_msg != 1} { # set host_0 "" set timeout $max_job_delay -set srun_pid [spawn $srun -N1 -l --tmp=0 -t1 $bin_hostname] +set srun_pid [spawn $srun -N1 -l --tmp=0 -t1 $bin_printenv SLURMD_NODENAME] expect { - -re "0: ($alpha_numeric)" { + -re "0: ($alpha_numeric_under)" { set host_0 $expect_out(1,string) exp_continue } diff --git a/testsuite/expect/test1.24 b/testsuite/expect/test1.24 index 096b698a2..cd49bd22a 100755 --- a/testsuite/expect/test1.24 +++ b/testsuite/expect/test1.24 @@ -37,20 +37,26 @@ set exit_code 0 print_header $test_id +if {[test_wiki_sched] == 1} { + send_user "\nWARNING: not compatable with sched/wiki (Maui)\n" + send_user " or sched/wiki2 (Moab) schedulers\n" + exit $exit_code +} + # # Submit a job with invalid constraint requirement # set err_msg 0 set host_0 "" set timeout $max_job_delay -set srun_pid [spawn $srun -N1 -l --constraint=invalid,constraint -t1 $bin_hostname] +set srun_pid [spawn $srun -N1 -l --constraint=invalid,constraint -t1 $bin_printenv SLURMD_NODENAME] expect { -re "configuration is not available" { send_user "This error is expected, no worries\n" set err_msg 1 exp_continue } - -re "0: ($alpha_numeric)" { + -re "0: ($alpha_numeric_under)" { set host_0 $expect_out(1,string) exp_continue } diff --git a/testsuite/expect/test1.27 b/testsuite/expect/test1.27 index 636aa59fe..976919b8d 100755 --- a/testsuite/expect/test1.27 +++ b/testsuite/expect/test1.27 @@ -65,7 +65,7 @@ array set good_vars { set timeout $max_job_delay set srun_pid [spawn $srun -N1 -n1 -t1 $bin_env] expect { - -re "(SLURM_$alpha_under)=($alpha_numeric)" { + -re "(SLURM_$alpha_under)=($alpha_numeric_under)" { set found_vars($expect_out(1,string)) "$expect_out(2,string)" exp_continue } diff --git a/testsuite/expect/test1.29 b/testsuite/expect/test1.29 index c055e47dc..7da4c0dd0 100755 --- a/testsuite/expect/test1.29 +++ b/testsuite/expect/test1.29 @@ -61,7 +61,7 @@ if {[test_front_end] != 0 && [test_super_user] == 0} { # of the ulimit program is inconsistent across systems. # exec $bin_rm -f $file_prog_get $file_err $file_in $file_out -exec $bin_make -f /dev/null $file_prog_get +exec $bin_cc -O ${file_prog_get}.c -o $file_prog_get exec $bin_chmod 700 $file_prog_get # diff --git a/testsuite/expect/test1.30 b/testsuite/expect/test1.30 index c49166785..7bfc6c32d 100755 --- a/testsuite/expect/test1.30 +++ b/testsuite/expect/test1.30 @@ -55,7 +55,7 @@ if { [test_bluegene] } { # set timeout $max_job_delay for {set node_cnt 1} {$node_cnt > 0} {set node_cnt [expr $node_cnt * 2]} { - set srun_pid [spawn $srun --immediate -N$node_cnt $srun_opts -t2 $bin_hostname] + set srun_pid [spawn $srun --immediate -N$node_cnt $srun_opts -t2 $bin_printenv SLURMD_NODENAME] expect { -re "Immediate execution impossible" { send_user "This error is expected, no worries\n" diff --git a/testsuite/expect/test1.32 b/testsuite/expect/test1.32 index 9275a8bee..316173bed 100755 --- a/testsuite/expect/test1.32 +++ b/testsuite/expect/test1.32 @@ -54,7 +54,7 @@ if { [test_xcpu] } { # Delete left-over program and rebuild it # exec $bin_rm -f $file_prog -exec $bin_make -f /dev/null $file_prog +exec $bin_cc -O ${file_prog}.c -o $file_prog exec $bin_chmod 700 $file_prog # @@ -81,12 +81,11 @@ expect { # set timeout $max_job_delay set srun_pid [spawn $srun -N1 -t1 --unbuffered $file_prog] -exec $bin_sleep 5 -exec $bin_kill -USR1 $srun_pid -exec $bin_kill -USR2 $srun_pid expect { -re "WAITING" { incr matches + exec $bin_kill -USR1 $srun_pid + exec $bin_kill -USR2 $srun_pid exp_continue } -re "SIGUSR($number)" { diff --git a/testsuite/expect/test1.34 b/testsuite/expect/test1.34 new file mode 100755 index 000000000..87485a9de --- /dev/null +++ b/testsuite/expect/test1.34 @@ -0,0 +1,87 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of task signal abort message +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2002-2007 The Regents of the University of California. +# Copyright (C) 2008 Lawrence Livermore National Security +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Morris Jette <jette1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "1.34" +set exit_code 0 +set test_prog "test$test_id.prog" + +print_header $test_id + +if {[test_front_end] != 0} { + send_user "\nWARNING: This test is incompatable with front-end systems\n" + exit 0 +} + +# +# Delete left-over program and rebuild it +# +file delete $test_prog + +exec $bin_make -f /dev/null $test_prog +exec $bin_chmod 700 $test_prog + +# +# Spawn program and check for task exit messages from srun +# +set matches 0 +set timeout $max_job_delay +set srun_pid [spawn $srun -n1 -O $test_prog] +expect { + -re "Segmentation fault" { + send_user "\nThis error was expected, no worries\n" + set matches 1 + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + slow_kill $srun_pid + set exit_code 1 + } + eof { + wait + } +} +if {$matches != 1} { + send_user "\nFAILURE: srun failed to report exit code\n" + set exit_code 1 +} + +# +# Post-processing +# +if {$exit_code == 0} { + exec $bin_rm -f $test_prog + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test1.34.prog.c b/testsuite/expect/test1.34.prog.c new file mode 100644 index 000000000..d51a6c56e --- /dev/null +++ b/testsuite/expect/test1.34.prog.c @@ -0,0 +1,33 @@ +/*****************************************************************************\ + * test1.34.prog.c - Sleep for a second and terminate with SIGSEGV + ***************************************************************************** + * Copyright (C) 2008 Lawrence Livermore National Security. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Morris Jette <jette1@llnl.gov> + * LLNL-CODE-402394. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ +int main (int argc, char *argv[]) +{ + char *buffer; + + buffer = (char *) 0; + buffer[1] = 'a'; + return; +} diff --git a/testsuite/expect/test1.38 b/testsuite/expect/test1.38 index fd14cf077..3501fbeaf 100755 --- a/testsuite/expect/test1.38 +++ b/testsuite/expect/test1.38 @@ -73,26 +73,30 @@ expect { # exec $bin_pkill -INT -n -u $uid srun # set timeout $max_job_delay -set matches 0 +set match_run 0 +set match_term 0 +set match_wait 0 set job_id 0 set srun_pid [spawn $srun -v -N1 -t1 --unbuffered $file_in] expect { - -re "srun: task0: running" { - incr matches - cancel_job $job_id - exp_continue - } -re "launching ($number).0" { set job_id $expect_out(1,string) exp_continue } + -re "srun: task 0: running" { + set match_run 1 + cancel_job $job_id + exp_continue + } -re "WAITING" { - incr matches + set match_wait 1 + # make sure the task gets started + sleep 1 exec $bin_kill -INT $srun_pid exp_continue } - -re "Force Terminated" { - incr matches + -re "Force Terminated job" { + set match_term 1 exp_continue } timeout { @@ -104,8 +108,8 @@ expect { wait } } -if {$matches != 3} { - send_user "\nFAILURE: srun failed to properly process SIGINT\n" +if {[expr $match_run + $match_wait + $match_term] != 3} { + send_user "\nFAILURE: srun failed to properly process SIGINT matches was $matches\n" set exit_code 1 } @@ -114,7 +118,9 @@ if {$matches != 3} { # Note: For systems supporting proper pthreads, instead use # exec $bin_kill -INT $srun_pid # -set matches 0 +set match_run 0 +set match_term 0 +set match_wait 0 set job_id 0 set srun_pid [spawn $srun -v -N1 -t1 --unbuffered --quit-on-interrupt $file_in] expect { @@ -123,20 +129,20 @@ expect { exp_continue } -re "WAITING" { - incr matches + set match_wait 1 exec $bin_kill -INT $srun_pid exp_continue } -re "srun: interrupt" { - set matches = -999 + set match_run 999 exp_continue } -re "srun: task0: running" { - set matches = -999 + set match_run 999 exp_continue } -re "forcing job termination" { - incr matches + set match_term 1 exp_continue } timeout { @@ -149,7 +155,7 @@ expect { } } cancel_job $job_id -if {$matches != 2} { +if {[expr $match_run + $match_wait + $match_term] != 2} { send_user "\nFAILURE: srun failed to properly process SIGINT\n" set exit_code 1 } diff --git a/testsuite/expect/test1.43 b/testsuite/expect/test1.43 index c88b1d9ea..cf6834a27 100755 --- a/testsuite/expect/test1.43 +++ b/testsuite/expect/test1.43 @@ -41,7 +41,7 @@ print_header $test_id # set timeout 60 for {set node_cnt 1} {$node_cnt > 0} {set node_cnt [expr $node_cnt * 2]} { - set srun_pid [spawn $srun --test-only -N$node_cnt -t1 $bin_hostname] + set srun_pid [spawn $srun --test-only -N$node_cnt -t1 $bin_printenv SLURMD_NODENAME] expect { -re "allocation success" { exp_continue diff --git a/testsuite/expect/test1.44 b/testsuite/expect/test1.44 index e66c46909..993f1aa9a 100755 --- a/testsuite/expect/test1.44 +++ b/testsuite/expect/test1.44 @@ -8,7 +8,8 @@ # "FAILURE: ..." otherwise with an explanation of the failure, OR # anything else indicates a failure mode that must be investigated. ############################################################################ -# Copyright (C) 2005 The Regents of the University of California. +# Copyright (C) 2002-2007 The Regents of the University of California. +# Copyright (C) 2008 Lawrence Livermore National Security. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Chris Morrone <morrone2@llnl.gov> # LLNL-CODE-402394. @@ -97,7 +98,11 @@ for {set inx 0} {$inx < $cycle_count} {incr inx} { set stdout_lines [get_line_cnt $file_out] } if {$stdout_lines != $stdout_target} { - send_user "\nFAILURE: stdout is incomplete\n" + if {$stdout_lines == 0} { + send_user "\nFAILURE: stdout is empty, is current working directory writable from compute nodes?\n" + } else { + send_user "\nFAILURE:stdout is incomplete\n" + } set exit_code 1 break } else { diff --git a/testsuite/expect/test1.46 b/testsuite/expect/test1.46 index 57abc964a..0938ed86b 100755 --- a/testsuite/expect/test1.46 +++ b/testsuite/expect/test1.46 @@ -66,7 +66,7 @@ main() close $fd # Add delay due to sporatic error "Clock skew detected" exec $bin_sleep 1 -exec $bin_make -f /dev/null $file_in +exec $bin_cc -O -o $file_in ${file_in}.c exec $bin_chmod 700 $file_in # diff --git a/testsuite/expect/test1.51 b/testsuite/expect/test1.51 index 34528e12e..45ae78925 100755 --- a/testsuite/expect/test1.51 +++ b/testsuite/expect/test1.51 @@ -58,6 +58,7 @@ make_bash_script $file_script " # Set umask and confirm it is propagated # set matches 0 +set timeout $max_job_delay set srun_pid [spawn ./$file_script] expect { -re (0123|123) { diff --git a/testsuite/expect/test1.52 b/testsuite/expect/test1.52 index b488a13df..74e47c00c 100755 --- a/testsuite/expect/test1.52 +++ b/testsuite/expect/test1.52 @@ -81,7 +81,7 @@ set node0 0 set node1 0 set node2 0 set no_hostfile 0 - +set timeout $max_job_delay for {set i 0} {$i<3} {incr i} { if { $i==1 } { @@ -123,7 +123,7 @@ for {set i 0} {$i<3} {incr i} { set no_hostfile 1 exp_continue } - -re "($number): ($alpha_numeric)" { + -re "($number): ($alpha_numeric_under)" { set task_id $expect_out(1,string) if {$task_id == 0} { set node0 $expect_out(2,string) diff --git a/testsuite/expect/test1.81 b/testsuite/expect/test1.81 index 662bbee15..14541174b 100755 --- a/testsuite/expect/test1.81 +++ b/testsuite/expect/test1.81 @@ -121,7 +121,7 @@ set host_1 "" set srun_pid [spawn $srun -N1-1 -l -t1 $bin_printenv SLURM_NODEID] expect { - -re "($number): ($alpha_numeric)" { + -re "($number): ($alpha_numeric_under)" { if {$expect_out(1,string) == 0} { set host_0 $expect_out(2,string) } @@ -165,7 +165,7 @@ set host_3 "" set timeout $max_job_delay set srun_pid [spawn $srun -N1-3 -l -t1 $bin_printenv SLURM_NODEID] expect { - -re "($number): ($alpha_numeric)" { + -re "($number): ($alpha_numeric_under)" { if {$expect_out(1,string) == 0} { set host_0 $expect_out(2,string) } @@ -230,7 +230,7 @@ expect { send_user "\nWARNING: can't test srun task distribution\n" exit $exit_code } - -re "($number): ($alpha_numeric)" { + -re "($number): ($alpha_numeric_under)" { if {$expect_out(1,string) == 0} { set host_0 $expect_out(2,string) } diff --git a/testsuite/expect/test1.82 b/testsuite/expect/test1.82 index 6c56e40c5..cf8004058 100755 --- a/testsuite/expect/test1.82 +++ b/testsuite/expect/test1.82 @@ -76,7 +76,7 @@ expect { slow_kill $srun_pid exit 0 } - -re "($number): ($alpha_numeric)" { + -re "($number): ($alpha_numeric_under)" { if {$expect_out(1,string) == 0} { set host_0 $expect_out(2,string) } @@ -156,7 +156,7 @@ expect { exit 0 } - -re "($number): ($alpha_numeric)" { + -re "($number): ($alpha_numeric_under)" { if {$expect_out(1,string) == 0} { set host_0 $expect_out(2,string) } @@ -213,7 +213,7 @@ set host_1 "" set timeout $max_job_delay set srun_pid [spawn $srun -N1 -l --nodelist=$include_node -t1 $bin_printenv SLURMD_NODENAME] expect { - -re "($number): ($alpha_numeric)" { + -re "($number): ($alpha_numeric_under)" { if {$expect_out(1,string) == 0} { set host_0 $expect_out(2,string) } diff --git a/testsuite/expect/test1.83 b/testsuite/expect/test1.83 index 4e20b2509..3a99bf56a 100755 --- a/testsuite/expect/test1.83 +++ b/testsuite/expect/test1.83 @@ -43,6 +43,11 @@ set exit_code 0 print_header $test_id +if {[test_wiki_sched] == 1} { + send_user "\nWARNING: not compatable with sched/wiki (Maui)\n" + send_user " or sched/wiki2 (Moab) schedulers\n" + exit $exit_code +} if {[test_front_end] != 0} { send_user "\nWARNING: This test is incompatable with front-end systems\n" exit 0 @@ -75,22 +80,6 @@ expect { send_user "\nWARNING: can't test srun task distribution\n" exit $exit_code } - -re "($number): ($alpha_numeric_under)($number)" { - set task_id $expect_out(1,string) - if {$task_id == 0} { - set host_0_name $expect_out(2,string) - set host_0_num $expect_out(3,string) - } - if {$task_id == 1} { - set host_1_name $expect_out(2,string) - set host_1_num $expect_out(3,string) - } - if {$task_id == 2} { - set host_2_name $expect_out(2,string) - set host_2_num $expect_out(3,string) - } - exp_continue - } -re "($number): ($alpha_numeric_under)" { set task_id $expect_out(1,string) if {$task_id == 0} { @@ -127,20 +116,24 @@ if {[test_front_end] != 0} { if {[string compare $host_0_name ""] == 0} { send_user "\nFAILURE: Did not get hostname of task 0\n" - set exit_code 1 + exit 1 } if {[string compare $host_1_name ""] == 0} { send_user "\nFAILURE: Did not get hostname of task 1\n" - set exit_code 1 + exit 1 } if {[string compare $host_2_name ""] == 0} { send_user "\nFAILURE: Did not get hostname of task 2\n" - set exit_code 1 + exit 1 } # # Verify node sequence number # +set host_0_num [get_suffix $host_0_name] +set host_1_num [get_suffix $host_1_name] +set host_2_num [get_suffix $host_2_name] + if {$host_1_num != [expr $host_0_num + 1]} { send_user "\nFAILURE: node sequence number not contiguous\n" set exit_code 1 diff --git a/testsuite/expect/test1.84 b/testsuite/expect/test1.84 index e23d88f01..6085e12d0 100755 --- a/testsuite/expect/test1.84 +++ b/testsuite/expect/test1.84 @@ -54,7 +54,7 @@ if {[test_multiple_slurmd] != 0} { make_bash_script $file_in " env | grep SLURM_CPUS_ON_NODE - $bin_hostname" + $bin_printenv SLURMD_NODENAME" # # Submit a 1 node job to determine the node's CPU count @@ -77,7 +77,7 @@ expect { incr task_cnt exp_continue } - -re "($number): ($alpha_numeric)" { + -re "($number): ($alpha_numeric_under)" { set task $expect_out(1,string) set host $expect_out(2,string) exp_continue @@ -123,7 +123,7 @@ if {$cpu_cnt < 2} { # Submit a 1 node job to determine the node's CPU count # set task_cnt 0 -set srun_pid [spawn $srun -N1 -t1 --nodelist=$host --cpus-per-task=2 -l $bin_hostname] +set srun_pid [spawn $srun -N1 -t1 --nodelist=$host --cpus-per-task=2 -l $bin_printenv SLURMD_NODENAME] expect { -re "Invalid node name specified" { send_user "\nWARNING: Appears you are using " @@ -131,7 +131,7 @@ expect { send_user "This test won't work in that fashion.\n" exit 0 } - -re "$number: ($alpha_numeric)" { + -re "$number: ($alpha_numeric_under)" { incr task_cnt exp_continue } diff --git a/testsuite/expect/test1.86 b/testsuite/expect/test1.86 index fd6dc7043..44163c7aa 100755 --- a/testsuite/expect/test1.86 +++ b/testsuite/expect/test1.86 @@ -59,11 +59,18 @@ make_bash_script $file_in " $bin_bash --norc " +# +# Expect seems to have trouble parsing with this test due to a bug in Expect. +# It overwrites the stdout parsed with stdin to the commands +# Change PATH to shorten the strings, which seems to help +# +set env(PATH) "${slurm_dir}/bin:$env(PATH)" + # # Submit a 2 node job # set timeout $max_job_delay -set salloc_pid [spawn $salloc -N2 -t1 ./$file_in] +set salloc_pid [spawn salloc -N2 -t1 ./$file_in] expect { -re "Node count specification invalid" { send_user "\nWARNING: can't test srun task distribution\n" @@ -78,13 +85,13 @@ expect { send_user "Job initiated\n" } timeout { - send_user "\nFAILURE: srun not responding\n" + send_user "\nFAILURE: salloc not responding\n" slow_kill $salloc_pid exit 1 } eof { wait - send_user "\nFAILURE: srun terminated\n" + send_user "\nFAILURE: salloc terminated\n" exit 1 } } @@ -95,9 +102,9 @@ exec $bin_rm -f $file_in # set host_0 "" set host_1 "" -send "$srun -l $bin_hostname\n" +send "srun -l $bin_printenv SLURMD_NODENAME\n" expect { - -re "($number): ($alpha_numeric)" { + -re "($number): ($alpha_numeric_under)" { set host_inx $expect_out(1,string) if {$host_inx == 0} { set host_0 $expect_out(2,string) @@ -144,9 +151,9 @@ if {$exit_code != 0} { # Exclude specific node # set matches 0 -send "$srun -l -N1 -n1 --exclude=$host_0 $bin_hostname\n" +send "srun -l -N1 -n1 --exclude=$host_0 $bin_printenv SLURMD_NODENAME\n" expect { - -re "0: ($alpha_numeric)" { + -re "0: ($alpha_numeric_under)" { if {[string compare $expect_out(1,string) $host_1] == 0} { incr matches } else { @@ -181,9 +188,9 @@ if {$matches == 0} { # Exclude specific node # set matches 0 -send "$srun -l -N1 -n1 --exclude=$host_1 $bin_hostname\n" +send "srun -l -N1 -n1 --exclude=$host_1 $bin_printenv SLURMD_NODENAME\n" expect { - -re "0: ($alpha_numeric)" { + -re "0: ($alpha_numeric_under)" { if {[string compare $expect_out(1,string) $host_0] == 0} { incr matches } else { @@ -218,7 +225,7 @@ if {$matches == 0} { # Include specific node # set matches 0 -send "$srun -l -N1 -n1 --nodelist=$host_0 $bin_hostname\n" +send "srun -l -N1 -n1 --nodelist=$host_0 $bin_printenv SLURMD_NODENAME\n" expect { -re "Requested node configuration is not available" { send_user "\nWARNING: Appears you are using " @@ -227,7 +234,7 @@ expect { send "exit\n" exit 0 } - -re "0: ($alpha_numeric)" { + -re "0: ($alpha_numeric_under)" { if {[string compare $expect_out(1,string) $host_0] == 0} { incr matches } else { @@ -262,9 +269,9 @@ if {$matches == 0} { # Include specific node # set matches 0 -send "$srun -l -N1 -n1 --nodelist=$host_1 $bin_hostname\n" +send "srun -l -N1 -n1 --nodelist=$host_1 $bin_printenv SLURMD_NODENAME\n" expect { - -re "0: ($alpha_numeric)" { + -re "0: ($alpha_numeric_under)" { if {[string compare $expect_out(1,string) $host_1] == 0} { incr matches } else { @@ -301,10 +308,10 @@ if {$matches == 0} { # lines, so we use two send commands for this execute line # set matches 0 -send "$srun -l -N1 -n1 --nodelist=$host_0 " -send " --exclude=$host_0 $bin_hostname\n" +send "srun -l -N1 -n1 --nodelist=$host_0 " +send " --exclude=$host_0 $bin_printenv SLURMD_NODENAME\n" expect { - -re "0: ($alpha_numeric)" { + -re "0: ($alpha_numeric_under)" { send_user "\nFAILURE: wrong node responded\n" set exit_code 1 exp_continue @@ -336,9 +343,9 @@ if {$matches == 0} { # Error test: Exceed node count # set matches 0 -send "$srun -l -N3 -n3 -O $bin_hostname\n" +send "srun -l -N3 -n3 -O $bin_printenv SLURMD_NODENAME\n" expect { - -re "0: ($alpha_numeric)" { + -re "0: ($alpha_numeric_under)" { send_user "\nFAILURE: wrong node responded\n" set exit_code 1 exp_continue @@ -371,9 +378,9 @@ if {$matches == 0} { # set test_0 "" set test_1 "" -send "$srun -l -N1-1 -n2 -O $bin_hostname\n" +send "srun -l -N1-1 -n2 -O $bin_printenv SLURMD_NODENAME\n" expect { - -re "($number): ($alpha_numeric)" { + -re "($number): ($alpha_numeric_under)" { set host_inx $expect_out(1,string) if {$host_inx == 0} { set test_0 $expect_out(2,string) @@ -409,9 +416,9 @@ if {[string compare $test_0 $test_1] != 0} { # Error test: Exceed task count, first get the processor count then exceed it # set processors 1 -send "$srun -l -c1 $bin_hostname\n" +send "srun -l -c1 $bin_printenv SLURMD_NODENAME\n" expect { - -re "($number): ($alpha_numeric)" { + -re "($number): ($alpha_numeric_under)" { incr processors exp_continue } @@ -433,9 +440,9 @@ expect { } } set matches 0 -send "$srun -l -n $processors $bin_hostname\n" +send "srun -l -n $processors $bin_printenv SLURMD_NODENAME\n" expect { - -re "0: ($alpha_numeric)" { + -re "0: ($alpha_numeric_under)" { send_user "\nFAILURE: wrong node responded\n" set exit_code 1 exp_continue diff --git a/testsuite/expect/test1.87 b/testsuite/expect/test1.87 index b2ce6cca5..94b7afa30 100755 --- a/testsuite/expect/test1.87 +++ b/testsuite/expect/test1.87 @@ -104,7 +104,7 @@ set host_2 "" set host_3 "" send "$srun -l $bin_printenv SLURMD_NODENAME\n" expect { - -re "($number): ($alpha_numeric)" { + -re "($number): ($alpha_numeric_under)" { set host_num $expect_out(1,string) if { $host_num == 0 } { set host_0 $expect_out(2,string) @@ -169,7 +169,7 @@ set matches 0 send "$srun -l -N2 -n2 --relative=0 $bin_printenv SLURMD_NODENAME\n" expect { - -re "($number): ($alpha_numeric)" { + -re "($number): ($alpha_numeric_under)" { set host_num $expect_out(1,string) if { $host_num == 0 } { if {[string compare $expect_out(2,string) $host_0] == 0} { @@ -221,7 +221,7 @@ if {$matches < 2} { set matches 0 send "$srun -l -N2 -n2 --relative=2 $bin_printenv SLURMD_NODENAME\n" expect { - -re "($number): ($alpha_numeric)" { + -re "($number): ($alpha_numeric_under)" { set host_num $expect_out(1,string) if { $host_num == 0 } { if {[string compare $expect_out(2,string) $host_2] == 0} { @@ -279,7 +279,7 @@ expect { incr error exp_continue } - -re "($number): ($alpha_numeric)" { + -re "($number): ($alpha_numeric_under)" { send_user "\nFAILURE: running where we shouldn't be able to run\n" set exit_code 1 exp_continue diff --git a/testsuite/expect/test1.89 b/testsuite/expect/test1.89 index 7c561700a..f7e703f6c 100755 --- a/testsuite/expect/test1.89 +++ b/testsuite/expect/test1.89 @@ -217,7 +217,7 @@ set cpu_cnt 0 while {$cpu_cnt < $task_cnt} { set mask_sum 0 set mask [ expr 1 << $cpu_cnt ] - set mstr [ dec2hex16 $mask] + set mstr [ dec2hex $mask] send "$srun -c1 --cpu_bind=mask_cpu:$mstr $file_prog\n" expect { -re "TASK_ID:($number),MASK:($number)" { @@ -258,7 +258,7 @@ set full_mask [ expr (1 << $task_cnt) - 1 ] while {$cpu_cnt < $task_cnt} { set mask_sum 0 set mask [ expr 1 << $cpu_cnt ] - set mstr [ dec2hex16 $mask] + set mstr [ dec2hex $mask] set fwd_mask "$fwd_mask,$mstr" set fwd_map "$fwd_map,$cpu_cnt" set rev_mask "$mstr,$rev_mask" diff --git a/testsuite/expect/test1.90 b/testsuite/expect/test1.90 index e7b6538e4..2b8f25897 100755 --- a/testsuite/expect/test1.90 +++ b/testsuite/expect/test1.90 @@ -95,13 +95,12 @@ set salloc_pid [spawn $salloc -N1 --exclusive --verbose -t2 $bin_bash] # Run a job step to get allocated processor count and affinity # expect -re $prompt -set task_cnt 0 -set full_mask 0 +set full_mask -1 +set timeout $max_job_delay send "$srun -c1 $file_prog\n" expect { -re "TASK_ID:($number),CPU_MASK:($number),MEM_MASK:($number)" { - set full_mask $expect_out(2,string) - incr task_cnt + set full_mask $expect_out(3,string) exp_continue } -re "error" { @@ -117,12 +116,37 @@ expect { -re $prompt } +# +# We probably bind to socket memory, so get that from the MEM_MASK +# and use that number of tasks +# +if {$full_mask == 1} { + set task_cnt 1 +} elseif {$full_mask == 3} { + set task_cnt 2 +} elseif {$full_mask == 7} { + set task_cnt 3 +} elseif {$full_mask == 15} { + set task_cnt 4 +} elseif {$full_mask == 31} { + set task_cnt 5 +} elseif {$full_mask == 63} { + set task_cnt 6 +} elseif {$full_mask == 127} { + set task_cnt 7 +} elseif {$full_mask == 255} { + set task_cnt 8 +} else { + send_user "\nFAILURE: Unable to get memory mask\n" + exit 1 +} + # # Run a job step with memory affinity # set cpu_mask 0 set mem_mask 0 -send "$srun -c1 --mem_bind=rank $file_prog\n" +send "$srun -n $task_cnt --mem_bind=rank $file_prog\n" expect { -re "TASK_ID:($number),CPU_MASK:($number),MEM_MASK:($number)" { incr cpu_mask $expect_out(2,string) @@ -151,7 +175,7 @@ if {$mem_mask != $full_mask} { # set task_mask 0 set verbose_cnt 0 -send "$srun -c1 --mem_bind=verbose,map_mem:0 $file_prog\n" +send "$srun -n $task_cnt --mem_bind=verbose,map_mem:0 $file_prog\n" expect { -re "TASK_ID:($number),CPU_MASK:($number),MEM_MASK:($number)" { incr task_mask $expect_out(3,string) @@ -174,7 +198,7 @@ if {$task_mask != $task_cnt} { set exit_code 1 } set verbose_cnt 0 -send "$srun -c1 --mem_bind=verbose,map_mem:0 $file_prog\n" +send "$srun -n $task_cnt --mem_bind=verbose,map_mem:0 $file_prog\n" expect { -re "mem_bind=MAP" { incr verbose_cnt @@ -200,7 +224,7 @@ if {$verbose_cnt != $task_cnt} { # # Run all tasks all bound to the same CPU's memory (local CPU) # -send "$srun -c1 --cpu_bind=rank --mem_bind=local $file_prog\n" +send "$srun -n $task_cnt --cpu_bind=rank --mem_bind=local $file_prog\n" expect { -re "TASK_ID:($number),CPU_MASK:($number),MEM_MASK:($number)" { if {$expect_out(2,string) != $expect_out(3,string)} { @@ -230,7 +254,7 @@ set cpu_cnt 0 while {$cpu_cnt < $task_cnt} { set mask_sum 0 set mask [ expr 1 << $cpu_cnt ] - send "$srun -c1 --mem_bind=map_mem:$cpu_cnt $file_prog\n" + send "$srun -n $task_cnt --mem_bind=map_mem:$cpu_cnt $file_prog\n" expect { -re "TASK_ID:($number),CPU_MASK:($number),MEM_MASK:($number)" { incr mask_sum $expect_out(3,string) @@ -263,8 +287,8 @@ set cpu_cnt 0 while {$cpu_cnt < $task_cnt} { set mask_sum 0 set mask [ expr 1 << $cpu_cnt ] - set mstr [ dec2hex16 $mask] - send "$srun -c1 --mem_bind=mask_mem:$mstr $file_prog\n" + set mstr [ dec2hex $mask] + send "$srun -n $task_cnt --mem_bind=mask_mem:$mstr $file_prog\n" expect { -re "TASK_ID:($number),CPU_MASK:($number),MEM_MASK:($number)" { incr mask_sum $expect_out(3,string) @@ -304,7 +328,7 @@ set full_mask [ expr (1 << $task_cnt) - 1 ] while {$cpu_cnt < $task_cnt} { set mask_sum 0 set mask [ expr 1 << $cpu_cnt ] - set mstr [ dec2hex16 $mask] + set mstr [ dec2hex $mask] set fwd_mask "$fwd_mask,$mstr" set fwd_map "$fwd_map,$cpu_cnt" set rev_mask "$mstr,$rev_mask" @@ -340,7 +364,7 @@ send_user "alt_mask: $alt_mask\n" # Run all tasks bound to a different CPU's memory by specifying a forward map # set task_mask 0 -send "$srun -c1 --mem_bind=map_mem:$fwd_map $file_prog\n" +send "$srun -n $task_cnt --mem_bind=map_mem:$fwd_map $file_prog\n" expect { -re "TASK_ID:($number),CPU_MASK:($number),MEM_MASK:($number)" { incr task_mask $expect_out(3,string) @@ -368,7 +392,7 @@ if {$task_mask != $full_mask} { # Run all tasks bound to a different CPU's memory by specifying a reverse map # set task_mask 0 -send "$srun -c1 --mem_bind=map_mem:$rev_map $file_prog\n" +send "$srun -n $task_cnt --mem_bind=map_mem:$rev_map $file_prog\n" expect { -re "TASK_ID:($number),CPU_MASK:($number),MEM_MASK:($number)" { incr task_mask $expect_out(3,string) @@ -396,7 +420,7 @@ if {$task_mask != $full_mask} { # Run all tasks bound to a different CPU's memroy by specifying an alternating map # set task_mask 0 -send "$srun -c1 --mem_bind=map_mem:$alt_map $file_prog\n" +send "$srun -n $task_cnt --mem_bind=map_mem:$alt_map $file_prog\n" expect { -re "TASK_ID:($number),CPU_MASK:($number),MEM_MASK:($number)" { incr task_mask $expect_out(3,string) @@ -424,7 +448,7 @@ if {$task_mask != $full_mask} { # Run all tasks bound to a different CPU's memory by specifying a forward mask # set task_mask 0 -send "$srun -c1 --mem_bind=mask_mem:$fwd_mask $file_prog\n" +send "$srun -n $task_cnt --mem_bind=mask_mem:$fwd_mask $file_prog\n" expect { -re "TASK_ID:($number),CPU_MASK:($number),MEM_MASK:($number)" { incr task_mask $expect_out(3,string) @@ -452,7 +476,7 @@ if {$task_mask != $full_mask} { # Run all tasks bound to a different CPU's memory by specifying a reverse mask # set task_mask 0 -send "$srun -c1 --mem_bind=mask_mem:$rev_mask $file_prog\n" +send "$srun -n $task_cnt --mem_bind=mask_mem:$rev_mask $file_prog\n" expect { -re "TASK_ID:($number),CPU_MASK:($number),MEM_MASK:($number)" { incr task_mask $expect_out(3,string) @@ -480,7 +504,7 @@ if {$task_mask != $full_mask} { # Run all tasks bound to a different CPU's memory by specifying an alternating mask # set task_mask 0 -send "$srun -c1 --mem_bind=mask_mem:$alt_mask $file_prog\n" +send "$srun -n $task_cnt --mem_bind=mask_mem:$alt_mask $file_prog\n" expect { -re "TASK_ID:($number),CPU_MASK:($number),MEM_MASK:($number)" { incr task_mask $expect_out(3,string) diff --git a/testsuite/expect/test1.91 b/testsuite/expect/test1.91 index 256b0d4ba..01931d6fa 100755 --- a/testsuite/expect/test1.91 +++ b/testsuite/expect/test1.91 @@ -65,7 +65,7 @@ send_user "\ntask affinity plugin installed\n" # Identify a usable node set timeout $max_job_delay set node_name "" -set srun_pid [spawn $srun -N1 --exclusive --verbose $bin_hostname] +set srun_pid [spawn $srun -N1 --exclusive --verbose $bin_printenv SLURMD_NODENAME] expect { -re "on host ($alpha_numeric_under)" { set node_name $expect_out(1,string) diff --git a/testsuite/expect/test1.92 b/testsuite/expect/test1.92 index 0938a54f0..c23d95e5b 100755 --- a/testsuite/expect/test1.92 +++ b/testsuite/expect/test1.92 @@ -286,6 +286,9 @@ expect { if {$exit_code == 0} { send_user "\nSUCCESS\n" +} else { + send_user "\nNOTE: This test can fail if the node configuration in slurm.conf \n" + send_user " (sockets, cores, threads) differs from the actual configuration\n" } file delete $file_bash exit $exit_code diff --git a/testsuite/expect/test1.93 b/testsuite/expect/test1.93 index da5803286..1cd34b619 100755 --- a/testsuite/expect/test1.93 +++ b/testsuite/expect/test1.93 @@ -88,9 +88,9 @@ exec $bin_rm -f $file_in # set host_0 "" set host_1 "" -send "$srun -l --mpi=lam $bin_hostname\n" +send "$srun -l --mpi=lam $bin_printenv SLURMD_NODENAME\n" expect { - -re "($number): ($alpha_numeric)" { + -re "($number): ($alpha_numeric_under)" { set host_inx $expect_out(1,string) if {$host_inx == 0} { set host_0 $expect_out(2,string) diff --git a/testsuite/expect/test10.1 b/testsuite/expect/test10.1 index cabb50915..aacd65a05 100755 --- a/testsuite/expect/test10.1 +++ b/testsuite/expect/test10.1 @@ -37,10 +37,14 @@ set matches 0 print_header $test_id +if {[file exists $smap] == 0} { + send_user "\nWARNING: smap not installed\n" + exit 0 +} + # # Report the smap usage format # - spawn $smap --usage expect { -re "Usage: .*seconds.*" { diff --git a/testsuite/expect/test10.10 b/testsuite/expect/test10.10 index 6124c5ed8..76fb36527 100755 --- a/testsuite/expect/test10.10 +++ b/testsuite/expect/test10.10 @@ -35,14 +35,19 @@ source ./globals set test_id "10.10" set exit_code 0 set matches 0 -set stuff "" +set stuff "" + print_header $test_id +if {[file exists $smap] == 0} { + send_user "\nWARNING: smap not installed\n" + exit 0 +} + # # Check the the --noheader option in smap # in curses format. # - set timeout 10 spawn $smap --noheader expect { diff --git a/testsuite/expect/test10.11 b/testsuite/expect/test10.11 index 290638010..4bab9f667 100755 --- a/testsuite/expect/test10.11 +++ b/testsuite/expect/test10.11 @@ -37,6 +37,11 @@ set version 0 print_header $test_id +if {[file exists $smap] == 0} { + send_user "\nWARNING: smap not installed\n" + exit 0 +} + # # Report the smap version number # diff --git a/testsuite/expect/test10.12 b/testsuite/expect/test10.12 index 09dabf14b..5063f5aec 100755 --- a/testsuite/expect/test10.12 +++ b/testsuite/expect/test10.12 @@ -42,6 +42,10 @@ if { [test_bluegene] == 0 } { send_user "\nWARNING: This test is only compatable with bluegene systems\n" exit 0 } +if {[file exists $smap] == 0} { + send_user "\nFAILURE: smap not installed\n" + exit 1 +} # # Test smap resolve ablilty diff --git a/testsuite/expect/test10.13 b/testsuite/expect/test10.13 index 4f3a5d38a..db8fe4a0c 100755 --- a/testsuite/expect/test10.13 +++ b/testsuite/expect/test10.13 @@ -43,11 +43,16 @@ set stuff "" set too_small 0 print_header $test_id + +if {[file exists $smap] == 0} { + send_user "\nWARNING: smap not installed\n" + exit 0 +} + # # Check the smap creation of bluegene.conf file # in curses format. # - spawn $smap -Dc expect { -re "Screen is too small" { diff --git a/testsuite/expect/test10.2 b/testsuite/expect/test10.2 index 85106edd7..edeb5035e 100755 --- a/testsuite/expect/test10.2 +++ b/testsuite/expect/test10.2 @@ -37,10 +37,14 @@ set matches 0 print_header $test_id +if {[file exists $smap] == 0} { + send_user "\nWARNING: smap not installed\n" + exit 0 +} + # # Report the smap help message # - spawn $smap --help expect { -re "Usage:" { diff --git a/testsuite/expect/test10.3 b/testsuite/expect/test10.3 index fe21a8dc4..87a2c5a19 100755 --- a/testsuite/expect/test10.3 +++ b/testsuite/expect/test10.3 @@ -39,11 +39,15 @@ set too_small 0 print_header $test_id +if {[file exists $smap] == 0} { + send_user "\nWARNING: smap not installed\n" + exit 0 +} + # # Check the smap format of header for displaying slurm partitions # in curses format. # - spawn $smap -Ds expect { -re "Screen is too small" { diff --git a/testsuite/expect/test10.4 b/testsuite/expect/test10.4 index fbf8b1990..dd6cb8ce8 100755 --- a/testsuite/expect/test10.4 +++ b/testsuite/expect/test10.4 @@ -38,11 +38,15 @@ set matches 0 print_header $test_id +if {[file exists $smap] == 0} { + send_user "\nWARNING: smap not installed\n" + exit 0 +} + # # Check the smap format of header for displaying slurm partitions # in commandline format. # - spawn $smap -Ds -c expect { -re "PARTITION" { diff --git a/testsuite/expect/test10.5 b/testsuite/expect/test10.5 index 563e009a3..8f6043a42 100755 --- a/testsuite/expect/test10.5 +++ b/testsuite/expect/test10.5 @@ -40,11 +40,15 @@ set too_small 0 print_header $test_id +if {[file exists $smap] == 0} { + send_user "\nWARNING: smap not installed\n" + exit 0 +} + # # Check the smap format of header for displaying bg partitions # in curses format. # - spawn $smap -Db expect { -re "Screen is too small" { diff --git a/testsuite/expect/test10.6 b/testsuite/expect/test10.6 index 8e42cf2cd..6489c396c 100755 --- a/testsuite/expect/test10.6 +++ b/testsuite/expect/test10.6 @@ -39,11 +39,15 @@ set non_bg 0 print_header $test_id +if {[file exists $smap] == 0} { + send_user "\nWARNING: smap not installed\n" + exit 0 +} + # # Check the smap format of header for displaying bg partitions # in curses format. # - spawn $smap -Db -c expect { -re "BG SYSTEM" { diff --git a/testsuite/expect/test10.7 b/testsuite/expect/test10.7 index 143edfd6c..43445ae03 100755 --- a/testsuite/expect/test10.7 +++ b/testsuite/expect/test10.7 @@ -40,11 +40,15 @@ set too_small 0 print_header $test_id +if {[file exists $smap] == 0} { + send_user "\nWARNING: smap not installed\n" + exit 0 +} + # # Check the smap format of header for displaying job information # in curses format. # - spawn $smap -Dj expect { -re "Screen is too small" { diff --git a/testsuite/expect/test10.8 b/testsuite/expect/test10.8 index 7a53961ac..1a5bacb39 100755 --- a/testsuite/expect/test10.8 +++ b/testsuite/expect/test10.8 @@ -35,14 +35,19 @@ source ./globals set test_id "10.8" set exit_code 0 set matches 0 -set stuff "" +set stuff "" + print_header $test_id +if {[file exists $smap] == 0} { + send_user "\nWARNING: smap not installed\n" + exit 0 +} + # # Check the smap format of header for displaying job information # in commandline format. # - spawn $smap -Dj -c expect { -re "JOBID" { diff --git a/testsuite/expect/test10.9 b/testsuite/expect/test10.9 index 9cb315ca9..095cdd53e 100755 --- a/testsuite/expect/test10.9 +++ b/testsuite/expect/test10.9 @@ -40,6 +40,11 @@ set too_small 0 print_header $test_id +if {[file exists $smap] == 0} { + send_user "\nWARNING: smap not installed\n" + exit 0 +} + # # Check the smap --iterate option # in curses format. diff --git a/testsuite/expect/test11.2 b/testsuite/expect/test11.2 index 9012250c4..1fcb82291 100755 --- a/testsuite/expect/test11.2 +++ b/testsuite/expect/test11.2 @@ -92,7 +92,7 @@ got '$found_device'\n" spawn $poe hostname -rmpool $partition -euilib $mode -retry wait expect { - -re "LL_AdapterUsageMode = ($alpha_numeric)" { + -re "LL_AdapterUsageMode = ($alpha_numeric_under)" { set found_mode $expect_out(1,string) exp_continue } @@ -124,11 +124,11 @@ spawn $poe hostname -rmpool $partition -euilib $mode\ -euidevice $device -retry wait expect { - -re "LL_AdapterUsageMode = ($alpha_numeric)" { + -re "LL_AdapterUsageMode = ($alpha_numeric_under)" { set found_mode $expect_out(1,string) exp_continue } - -re "LL_AdapterUsageDevice = ($alpha_numeric)" { + -re "LL_AdapterUsageDevice = ($alpha_numeric_under)" { set found_device $expect_out(1,string) exp_continue } diff --git a/testsuite/expect/test11.7 b/testsuite/expect/test11.7 index abd36558f..a600aefdb 100755 --- a/testsuite/expect/test11.7 +++ b/testsuite/expect/test11.7 @@ -93,22 +93,22 @@ for {set i 0} {$i<2} {incr i} { set file [open $hostfile "w"] puts $file "$node1\n$node0" close $file - spawn $poe $bin_hostname -rmpool $partition -procs \ + spawn $poe $bin_printenv SLURMD_NODENAME -rmpool $partition -procs \ $num_tasks -nodes $num_nodes -retry wait \ -hostfile $hostfile } else { # # execute poe with a specific node count # - spawn $poe $bin_hostname -rmpool $partition -procs \ + spawn $poe $bin_printenv SLURMD_NODENAME -rmpool $partition -procs \ $num_tasks -nodes $num_nodes -retry wait } expect { - -re "0:($alpha_numeric)" { + -re "0:($alpha_numeric_under)" { set node0 $expect_out(1,string) exp_continue } - -re "1:($alpha_numeric)" { + -re "1:($alpha_numeric_under)" { set node1 $expect_out(1,string) exp_continue } diff --git a/testsuite/expect/test12.2 b/testsuite/expect/test12.2 index e4d9a66e3..2abedee55 100755 --- a/testsuite/expect/test12.2 +++ b/testsuite/expect/test12.2 @@ -97,7 +97,7 @@ if {$supported_storage == 0} { # exec $bin_rm -f $file_prog exec $bin_rm -f $file_in $file_out $file_err -exec $bin_make -f /dev/null $file_prog +exec $bin_cc -O -o $file_prog ${file_prog}.c exec $bin_chmod 700 $file_prog make_bash_script $file_in " @@ -145,20 +145,24 @@ set mem_task -1 set ave_used -1 spawn $sstat --noheader --job=$job_id.0 --fields vsize expect { - -re "($float)(\[KM\]*)/.*($number) - ($float)(\[KM\]*)" { + -re "($float)(\[KMG\]*)/.*($number) - ($float)(\[KMG\]*)" { set mem_used $expect_out(1,string) set scale1 $expect_out(2,string) set mem_task $expect_out(3,string) set ave_used $expect_out(4,string) set scale2 $expect_out(5,string) - if {[string compare $scale1 "M"] == 0} { + if {[string compare $scale1 "G"] == 0} { + set mem_used [expr $mem_used * 1024 * 1024] + } elseif {[string compare $scale1 "M"] == 0} { set mem_used [expr $mem_used * 1024] } elseif {[string compare $scale1 "K"] == 0} { set mem_used [expr $mem_used * 1] } else { set mem_used [expr $mem_used / 1024] } - if {[string compare $scale2 "M"] == 0} { + if {[string compare $scale2 "G"] == 0} { + set ave_used [expr $ave_used * 1024 * 1024] + } elseif {[string compare $scale2 "M"] == 0} { set ave_used [expr $ave_used * 1024] } elseif {[string compare $scale2 "K"] == 0} { set ave_used [expr $ave_used * 1] @@ -280,20 +284,24 @@ set mem_task -1 set ave_used -1 spawn $sacct --noheader --job=$job_id.0 --fields vsize expect { - -re "($float)(\[KM\]*)/.*($number) - ($float)(\[KM\]*)" { + -re "($float)(\[KMG\]*)/.*($number) - ($float)(\[KMG\]*)" { set mem_used $expect_out(1,string) set scale1 $expect_out(2,string) set mem_task $expect_out(3,string) set ave_used $expect_out(4,string) set scale2 $expect_out(5,string) - if {[string compare $scale1 "M"] == 0} { + if {[string compare $scale1 "G"] == 0} { + set mem_used [expr $mem_used * 1024 * 1024] + } elseif {[string compare $scale1 "M"] == 0} { set mem_used [expr $mem_used * 1024] } elseif {[string compare $scale1 "K"] == 0} { set mem_used [expr $mem_used * 1] } else { set mem_used [expr $mem_used / 1024] } - if {[string compare $scale2 "M"] == 0} { + if {[string compare $scale2 "G"] == 0} { + set ave_used [expr $ave_used * 1024 * 1024] + } elseif {[string compare $scale2 "M"] == 0} { set ave_used [expr $ave_used * 1024] } elseif {[string compare $scale2 "K"] == 0} { set ave_used [expr $ave_used * 1] @@ -343,7 +351,7 @@ if {$aix == 1 && $exit_code == 1} { } if {$exit_code == 0} { - exec rm -f $file_prog + exec rm -f $file_prog $file_in $file_out $file_err send_user "\nSUCCESS\n" } exit $exit_code diff --git a/testsuite/expect/test15.11 b/testsuite/expect/test15.11 index 845f0b2e6..52abcb841 100755 --- a/testsuite/expect/test15.11 +++ b/testsuite/expect/test15.11 @@ -39,6 +39,12 @@ set name_read "" print_header $test_id +if {[test_wiki_sched] == 1} { + send_user "\nWARNING: not compatable with sched/wiki (Maui)\n" + send_user " or sched/wiki2 (Moab) schedulers\n" + exit $exit_code +} + # # Test setting job's name and get priority # diff --git a/testsuite/expect/test15.12 b/testsuite/expect/test15.12 index 2b95da58a..5c8b25bf6 100755 --- a/testsuite/expect/test15.12 +++ b/testsuite/expect/test15.12 @@ -38,6 +38,12 @@ set file_in "test$test_id.input" print_header $test_id +if {[test_wiki_sched] == 1} { + send_user "\nWARNING: not compatable with sched/wiki (Maui)\n" + send_user " or sched/wiki2 (Moab) schedulers\n" + exit $exit_code +} + # # Submit a job with invalid constraint requirement # diff --git a/testsuite/expect/test15.13 b/testsuite/expect/test15.13 index 3015134dc..cb65cd3c5 100755 --- a/testsuite/expect/test15.13 +++ b/testsuite/expect/test15.13 @@ -56,13 +56,14 @@ array set good_vars { # Spawn a job via salloc to print environment variables # set job_id 0 +set timeout $max_job_delay set salloc_pid [spawn $salloc -N1 -t1 -v $bin_env] expect { -re "Granted job allocation ($number)" { set job_id $expect_out(1,string) exp_continue } - -re "(SLURM_$alpha_under)=($alpha_numeric)" { + -re "(SLURM_$alpha_under)=($alpha_numeric_under)" { set found_vars($expect_out(1,string)) "$expect_out(2,string)" exp_continue } diff --git a/testsuite/expect/test15.17 b/testsuite/expect/test15.17 index 2adfadf81..a6ae14ac2 100755 --- a/testsuite/expect/test15.17 +++ b/testsuite/expect/test15.17 @@ -45,7 +45,7 @@ print_header $test_id # Build input script file # exec $bin_rm -f $file_in -make_bash_script $file_in "$bin_hostname" +make_bash_script $file_in "$bin_printenv SLURMD_NODENAME" # # Spawn a sbatch job that uses stdout/err and confirm their contents diff --git a/testsuite/expect/test15.22 b/testsuite/expect/test15.22 index 63aaaa4b4..47004d8f7 100755 --- a/testsuite/expect/test15.22 +++ b/testsuite/expect/test15.22 @@ -47,7 +47,7 @@ print_header $test_id # spawn $sinfo --summarize expect { - -re "($end_of_line)($alpha_numeric)(\[ \*\]) *up" { + -re "($end_of_line)($alpha_numeric_under)(\[ \*\]) *up" { if (![string compare $expect_out(3,string) "*"]) { set def_part_name $expect_out(2,string) } else { @@ -99,7 +99,7 @@ if {$job_id == 0} { set read_part "" spawn $scontrol show job $job_id expect { - -re "Partition=($alpha_numeric)" { + -re "Partition=($alpha_numeric_under)" { set read_part $expect_out(1,string) exp_continue } @@ -157,7 +157,7 @@ if {$job_id == 0} { set read_part "" spawn $scontrol show job $job_id expect { - -re "Partition=($alpha_numeric)" { + -re "Partition=($alpha_numeric_under)" { set read_part $expect_out(1,string) exp_continue } diff --git a/testsuite/expect/test16.4 b/testsuite/expect/test16.4 index eede3faef..217c4ed48 100755 --- a/testsuite/expect/test16.4 +++ b/testsuite/expect/test16.4 @@ -49,7 +49,7 @@ if { [test_xcpu] } { # Delete left-over program and rebuild it # exec $bin_rm -f $file_prog -exec $bin_make -f /dev/null $file_prog +exec $bin_cc -O -o $file_prog ${file_prog}.c exec $bin_chmod 700 $file_prog # diff --git a/testsuite/expect/test17.12 b/testsuite/expect/test17.12 index da5499ef6..afd788900 100755 --- a/testsuite/expect/test17.12 +++ b/testsuite/expect/test17.12 @@ -35,8 +35,15 @@ source ./globals set test_id "17.12" set exit_code 0 set file_in "test$test_id.input" + print_header $test_id +if {[test_wiki_sched] == 1} { + send_user "\nWARNING: not compatable with sched/wiki (Maui)\n" + send_user " or sched/wiki2 (Moab) schedulers\n" + exit $exit_code +} + # # Delete left-over input script # Build input script file @@ -60,6 +67,7 @@ expect { } -re "Submitted batch job ($number)" { send_user "\nFAILURE: sbatch job submitted with invalid constraint\n" + cancel_job $expect_out(1,string) set exit_code 1 exp_continue } diff --git a/testsuite/expect/test17.13 b/testsuite/expect/test17.13 index 30f7b2952..2b2f35bfd 100755 --- a/testsuite/expect/test17.13 +++ b/testsuite/expect/test17.13 @@ -109,7 +109,7 @@ if {[wait_for_file $file_out] != 0} { spawn $bin_cat $file_out expect { - -re "(SLURM_$alpha_under)=($alpha_numeric)" { + -re "(SLURM_$alpha_under)=($alpha_numeric_under)" { set found_vars($expect_out(1,string)) "$expect_out(2,string)" exp_continue } diff --git a/testsuite/expect/test17.15 b/testsuite/expect/test17.15 index 2bfa3b89a..7bbf9bb32 100755 --- a/testsuite/expect/test17.15 +++ b/testsuite/expect/test17.15 @@ -56,7 +56,7 @@ print_header $test_id # of the ulimit program is inconsistent across systems. # exec $bin_rm -f $file_prog_get $file_err $file_in $file_out -exec $bin_make -f /dev/null $file_prog_get +exec $bin_cc -O -o $file_prog_get ${file_prog_get}.c # # Get our current limits and adjust targets accordingly diff --git a/testsuite/expect/test17.17 b/testsuite/expect/test17.17 index 105e0b642..fd9bba8cb 100755 --- a/testsuite/expect/test17.17 +++ b/testsuite/expect/test17.17 @@ -48,7 +48,7 @@ print_header $test_id # Submit a job and get the node's NodeName from the nodelist # set timeout $max_job_delay -set srun_pid [spawn $srun -v -N1 -l -t1 $bin_hostname] +set srun_pid [spawn $srun -v -N1 -l -t1 $bin_printenv SLURMD_NODENAME] expect { -re "on host ($alpha_numeric_under)," { set nodelist_name $expect_out(1,string) diff --git a/testsuite/expect/test17.19 b/testsuite/expect/test17.19 index 1be95bb18..d44849c4c 100755 --- a/testsuite/expect/test17.19 +++ b/testsuite/expect/test17.19 @@ -44,7 +44,7 @@ print_header $test_id # Build input script file # exec $bin_rm -f $file_in -make_bash_script $file_in "$bin_hostname" +make_bash_script $file_in "$bin_printenv SLURMD_NODENAME" # # Spawn a batch job that uses stdout/err and confirm their contents diff --git a/testsuite/expect/test17.23 b/testsuite/expect/test17.23 index 9eb4646f8..fe78c87dd 100755 --- a/testsuite/expect/test17.23 +++ b/testsuite/expect/test17.23 @@ -43,6 +43,12 @@ set job_prio3 0 print_header $test_id +if {[test_wiki_sched] == 1} { + send_user "\nWARNING: not compatable with sched/wiki (Maui)\n" + send_user " or sched/wiki2 (Moab) schedulers\n" + exit $exit_code +} + # # Build input script file # diff --git a/testsuite/expect/test17.31 b/testsuite/expect/test17.31 index 4f9daf676..70c9cfe87 100755 --- a/testsuite/expect/test17.31 +++ b/testsuite/expect/test17.31 @@ -54,7 +54,7 @@ if {$available < $node_cnt} { log_user 0 set scontrol_pid [spawn $scontrol show nodes] expect { - -re "CPUS=($number)" { + -re " CPUs=($number)" { if {$expect_out(1,string) < $ppn_cnt} { set ppn_cnt $expect_out(1,string) } diff --git a/testsuite/expect/test19.3 b/testsuite/expect/test19.3 index eb5c7f2a6..bbfa85086 100755 --- a/testsuite/expect/test19.3 +++ b/testsuite/expect/test19.3 @@ -72,7 +72,7 @@ set disabled 0 set matches 0 set strigger_pid [spawn $strigger --set -v --down --program=$cwd/$file_in] expect { - -re "Operation not permitted" { + -re "permission denied" { set disabled 1 exp_continue } diff --git a/testsuite/expect/test19.4 b/testsuite/expect/test19.4 index cfe61d7ad..47059583d 100755 --- a/testsuite/expect/test19.4 +++ b/testsuite/expect/test19.4 @@ -72,7 +72,7 @@ set disabled 0 set matches 0 set strigger_pid [spawn $strigger --set -v -offset=0 --reconfig --program=$cwd/$file_in] expect { - -re "Operation not permitted" { + -re "permission denied" { set disabled 1 exp_continue } diff --git a/testsuite/expect/test19.5 b/testsuite/expect/test19.5 index 39f40b68f..c64181b24 100755 --- a/testsuite/expect/test19.5 +++ b/testsuite/expect/test19.5 @@ -105,7 +105,7 @@ set disabled 0 set matches 0 set strigger_pid [spawn $strigger --set -v --time --jobid=$job_id --offset=-90 --program=$cwd/$file_in_time] expect { - -re "Operation not permitted" { + -re "permission denied" { set disabled 1 exp_continue } diff --git a/testsuite/expect/test19.6 b/testsuite/expect/test19.6 index 9e871ca97..8b12a62c6 100755 --- a/testsuite/expect/test19.6 +++ b/testsuite/expect/test19.6 @@ -104,7 +104,7 @@ set disabled 0 set matches 0 set strigger_pid [spawn $strigger --set -v --time --jobid=$job_id --offset=-30 --program=$cwd/$file_in_time] expect { - -re "Operation not permitted" { + -re "permission denied" { set disabled 1 exp_continue } diff --git a/testsuite/expect/test19.7 b/testsuite/expect/test19.7 index d4f809814..a2e454c0c 100755 --- a/testsuite/expect/test19.7 +++ b/testsuite/expect/test19.7 @@ -98,7 +98,7 @@ set disabled 0 set matches 0 set strigger_pid [spawn $strigger --set -v --idle --offset=1 --program=$cwd/$file_in_idle] expect { - -re "Operation not permitted" { + -re "permission denied" { set disabled 1 exp_continue } diff --git a/testsuite/expect/test21.1 b/testsuite/expect/test21.1 index e2e217a3e..3a142cc82 100755 --- a/testsuite/expect/test21.1 +++ b/testsuite/expect/test21.1 @@ -41,7 +41,7 @@ print_header $test_id # # Report the sacctmgr usage format # - + spawn $sacctmgr --usage expect { -re "commit changes immediately" { diff --git a/testsuite/expect/test21.10 b/testsuite/expect/test21.10 new file mode 100755 index 000000000..e40d6a52d --- /dev/null +++ b/testsuite/expect/test21.10 @@ -0,0 +1,244 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr add an account +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.10" +set exit_code 0 +set acmatches 0 +set aamatches 0 +set lmatches 0 +set damatches 0 +set dcmatches 0 +set not_support 0 +set add add +set lis list +set del delete +set mod modify +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall +set clu cluster +set tc1 tcluster1 +set fs1 2500 +set mc1 1000000 +set mj1 50 +set mn1 300 +set mw1 01:00:00 +set acc account +set ass associations +set nm1 testaccta1 +set des Description +set ds1 "Test Account A1" +set org Organization +set or1 "Account Org A1" +set qs QosLevel +set qs1 normal +set access_err 0 + +print_header $test_id + +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + +# +# Use sacctmgr to create a cluster +# +set sadd_pid [spawn $sacctmgr -i add $clu $nams=$tc1 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] +expect { + -re "privilege to preform this action" { + set access_err 1 + exp_continue + } + -re "Adding Cluster" { + incr acmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} +if {$acmatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters + got $acmatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to add an account +# +set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc1 $des="$ds1" $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$nm1 $org="$or1" $qs=$qs1] +expect { + -re "Adding Account" { + incr aamatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$aamatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $aamatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the test associations +# +set as_list_pid [spawn $sacctmgr list $ass $acc=$nm1 ] +expect { + -re "$tc1 *$nm1" { + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test account +# +set sadel_pid [spawn $sacctmgr -i $del $acc $nm1] +expect { + -re "Deleting account" { + incr damatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$damatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $damatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test cluster +# +set sadel_pid [spawn $sacctmgr -i $del $clu $tc1] +expect { + -re "Deleting clusters" { + incr dcmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dcmatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $dcmatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the test associations +# +set as2_list_pid [spawn $sacctmgr list $ass $acc=$nm1 ] +expect { + -re "$tc1 *$nm1" { + exp_continue + send_user "\nFAILURE: test associations still exist.\n" + set exit_code 1 + } + timeout { + send_user "\nTest associations delete verified.\n" + slow_kill $as2_list_pid + } + eof { + wait + } +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} + +exit $exit_code diff --git a/testsuite/expect/test21.11 b/testsuite/expect/test21.11 new file mode 100755 index 000000000..3411f0b80 --- /dev/null +++ b/testsuite/expect/test21.11 @@ -0,0 +1,275 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr add multiple accounts +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.11" +set exit_code 0 +set acmatches 0 +set aamatches 0 +set almatches 0 +set lmatches 0 +set damatches 0 +set dcmatches 0 +set not_support 0 +set add add +set lis list +set del delete +set mod modify +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall +set clu cluster +set tc1 tcluster1 +set tc2 tcluster2 +set fs1 2500 +set mc1 1000000 +set mj1 50 +set mn1 300 +set mw1 01:00:00 +set acc account +set ass associations +set nm1 testaccta1 +set nm2 testaccta2 +set des Description +set ds1 "Test Account A1" +set org Organization +set or1 "Account Org A1" +set qs QosLevel +set qs1 normal +set access_err 0 + +print_header $test_id + +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + +# +# Use sacctmgr to create a cluster +# +set sadd_pid [spawn $sacctmgr -i add $clu $nams=$tc1,$tc2 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] +expect { + -re "privilege to preform this action" { + set access_err 1 + exp_continue + } + -re "Adding Cluster" { + incr acmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} +if {$acmatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters + got $acmatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to add an account +# +set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc1 $des="$ds1" $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$nm1,$nm2 $org="$or1" $qs=$qs1] +expect { + -re "Adding Account" { + incr aamatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$aamatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $aamatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the test associations +# +set as_list_pid [spawn $sacctmgr list $ass] +expect { + -re "$tc1 *$nm1" { + incr almatches + exp_continue + } + -re "$tc1 *$nm2" { + incr almatches + exp_continue + } + -re "$tc2 *$nm1" { + set exit_code 1 + send_user "\nFAILURE: $nm1 account erroneously added to $tc2\n" + exp_continue + } + -re "$tc2 *$nm2" { + exp_continue + send_user "\nFAILURE: $nm2 account erroneously added to $tc2\n" + set exit_code 1 + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$almatches != 2} { + send_user "\nFAILURE: Added associations not found.\n" + send_user "\n Need 2 matches and got $almatches instead.\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test account +# +set sadel_pid [spawn $sacctmgr -i $del $acc $nm1,$nm2] +expect { + -re "Deleting account" { + incr damatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$damatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $damatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test cluster +# +set sadel_pid [spawn $sacctmgr -i $del $clu $tc1,$tc2] +expect { + -re "Deleting clusters" { + incr dcmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dcmatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $dcmatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the test associations +# +set as2_list_pid [spawn $sacctmgr list $ass $clu=$tc1 ] +expect { + -re "$tc1 *$nm1" { + exp_continue + send_user "\nFAILURE: test associations still exist.\n" + set exit_code 1 + } + -re "$tc1 *$nm2" { + exp_continue + send_user "\nFAILURE: test associations still exist.\n" + set exit_code 1 + } + timeout { + send_user "\nTest associations delete verified.\n" + slow_kill $as2_list_pid + } + eof { + wait + } +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" + } else { + send_user "\nFAILURE\n" + } + +exit $exit_code diff --git a/testsuite/expect/test21.12 b/testsuite/expect/test21.12 new file mode 100755 index 000000000..a7a363bbe --- /dev/null +++ b/testsuite/expect/test21.12 @@ -0,0 +1,305 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr list accounts +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.12" +set exit_code 0 +set acmatches 0 +set aamatches 0 +set almatches 0 +set aclmatches 0 +set lmatches 0 +set damatches 0 +set dcmatches 0 +set not_support 0 +set add add +set lis list +set del delete +set mod modify +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall +set clu cluster +set tc1 tcluster1 +set tc2 tcluster2 +set fs1 2500 +set mc1 1000000 +set mj1 50 +set mn1 300 +set mw1 01:00:00 +set acc account +set ass associations +set nm1 testaccta1 +set nm2 testaccta2 +set des Description +set ds1 testaccounta1 +set org Organization +set or1 accountorga1 +set qs QosLevel +set qs1 normal +set access_err 0 + +print_header $test_id + +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + +# +# Use sacctmgr to create a cluster +# +set sadd_pid [spawn $sacctmgr -i add $clu $nams=$tc1,$tc2 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] +expect { + -re "privilege to preform this action" { + set access_err 1 + exp_continue + } + -re "Adding Cluster" { + incr acmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} +if {$acmatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters + got $acmatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to add an account +# +set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc1 $des="$ds1" $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$nm1,$nm2 $org="$or1" $qs=$qs1] +expect { + -re "Adding Account" { + incr aamatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$aamatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $aamatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the test associations +# +set as_list_pid [spawn $sacctmgr list $ass] +expect { + -re "$tc1 *$nm1" { + incr almatches + exp_continue + } + -re "$tc1 *$nm2" { + incr almatches + exp_continue + } + -re "$tc2 *$nm1" { + set exit_code 1 + send_user "\nFAILURE: $nm1 account erroneously added to $tc2\n" + exp_continue + } + -re "$tc2 *$nm2" { + exp_continue + send_user "\nFAILURE: $nm2 account erroneously added to $tc2\n" + set exit_code 1 + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$almatches != 2} { + send_user "\nFAILURE: Added associations not found.\n" + send_user "\n Need 2 matches and got $almatches instead.\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the test accounts +# +set ac_list_pid [spawn $sacctmgr -n list $acc] +expect { + -re "$nm1 *$ds1 *$or1 *$qs1" { + incr aclmatches + exp_continue + } + -re "$nm2 *$ds1 *$or1 *$qs1" { + incr aclmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $ac_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$aclmatches != 2} { + send_user "\nFAILURE: Added account incorrect.\n" + send_user "\n Need 2 matches and got $aclmatches instead.\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test account +# +set sadel_pid [spawn $sacctmgr -i $del $acc $nm1,$nm2] +expect { + -re "Deleting account" { + incr damatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$damatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $damatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test cluster +# +set sadel_pid [spawn $sacctmgr -i $del $clu $tc1,$tc2] +expect { + -re "Deleting clusters" { + incr dcmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dcmatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $dcmatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the test associations +# +set as2_list_pid [spawn $sacctmgr list $ass $clu=$tc1 ] +expect { + -re "$tc1 *$nm1" { + exp_continue + send_user "\nFAILURE: test associations still exist.\n" + set exit_code 1 + } + -re "$tc1 *$nm2" { + exp_continue + send_user "\nFAILURE: test associations still exist.\n" + set exit_code 1 + } + timeout { + send_user "\nTest associations delete verified.\n" + slow_kill $as2_list_pid + } + eof { + wait + } +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" + } else { + send_user "\nFAILURE\n" + } + +exit $exit_code diff --git a/testsuite/expect/test21.13 b/testsuite/expect/test21.13 new file mode 100755 index 000000000..c6b907428 --- /dev/null +++ b/testsuite/expect/test21.13 @@ -0,0 +1,629 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr modify account, descriptions, limits +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.13" +set exit_code 0 +set tc1 tcluster1 +set tc2 tcluster2 +set tc3 tcluster3 +set fs1 2500 +set fs2 1700 +set mc1 1000000 +set mc2 700000 +set mj1 50 +set mj2 70 +set mn1 300 +set mn2 700 +set mw1 01:00:00 +set mw2 00:07:00 +set acc account +set ass associations +set nm1 testaccta1 +set nm2 testaccta2 +set nm3 testaccta3 +set des Description +set ds1 testaccounta1 +set ds2 testacct +set org Organization +set or1 accountorga1 +set or2 acttrg +set qs QosLevel +set qs1 normal +set par parent +set access_err 0 + +print_header $test_id + +set timeout 60 + +# +# Check accounting config and bail if not found. +# +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + + +# +# Use sacctmgr to create a cluster +# +proc _add_cluster {name} { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 + } + + set my_pid [spawn $sacctmgr -i add cluster $name] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding Cluster" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + exit_code 1 + } + eof { + wait + } + } + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters + got $matches\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + + return $exit_code +} + +# +# Use sacctmgr to remove the test cluster +# +proc _remove_cluster {name} { + global access_err sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 + } + + set my_pid [spawn $sacctmgr -i delete cluster $name] + expect { + -re "privilege to preform this action" { + set access_err 1 + exp_continue + } + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Deleting clusters" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + if {$access_err != 0} { + return 1 + } + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $matches\n" + incr exit_code 1 + } + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code +} + +# +# Use sacctmgr to add an account +# +proc _add_acct { cluster name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 + } + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" + } + + set my_pid [eval spawn $sacctmgr -i add account $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding Account" { + incr matches + exp_continue + } + -re "Associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 2} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $matches\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code +} + +# +# Use sacctmgr to remove an account +# +proc _remove_acct { cluster name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 1 + set check "Deleting account" + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 + } + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" + set check "Deleting account associations" + } + + set my_pid [eval spawn $sacctmgr -i delete account $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "$check" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting account. + got $matches\n" + incr exit_code 1 + } + + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code +} + +# +# Use sacctmgr to modify an account +# +proc _mod_acct { cluster name desc org parent fs maxcpu maxjob maxnodes maxwall wdesc worg} { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set expected 0 + set acct_stuff 0 + set assoc_stuff 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to modify\n" + return 1 + } + + #set up the where + set wcommand "where $name" + + if { [string length $cluster] } { + set wcommand "$wcommand cluster=$cluster" + } + + if { [string length $wdesc] } { + set wcommand "$wcommand description='$wdesc'" + } + + if { [string length $worg] } { + set wcommand "$wcommand organization='$worg'" + } + + #set up the set + set scommand "set" + if { [string length $parent] } { + set scommand "$scommand parent=$parent" + set assoc_stuff 1 + } + + if { [string length $fs] } { + set scommand "$scommand fairshare=$fs" + set assoc_stuff 1 + } + + if { [string length $maxcpu] } { + set scommand "$scommand maxc=$maxcpu" + set assoc_stuff 1 + } + + if { [string length $maxjob] } { + set scommand "$scommand maxj=$maxjob" + set assoc_stuff 1 + } + + if { [string length $maxnodes] } { + set scommand "$scommand maxn=$maxnodes" + set assoc_stuff 1 + } + + if { [string length $maxwall] } { + set scommand "$scommand maxw=$maxwall" + set assoc_stuff 1 + } + + if { [string length $desc] } { + set scommand "$scommand description='$desc'" + set acct_stuff 1 + } + + if { [string length $org] } { + set scommand "$scommand organization='$org'" + set acct_stuff 1 + } + + incr expected $acct_stuff + incr expected $assoc_stuff + + set my_pid [eval spawn $sacctmgr -i modify account $scommand $wcommand ] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Modified accounts" { + incr matches + exp_continue + } + -re "Modified account associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != $expected} { + send_user "\nFAILURE: sacctmgr had a problem modifying account. + got $matches needed $expected\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code +} + +#make sure we have a clean system and permission to do this work +_remove_cluster "$tc1,$tc2" +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} +_remove_acct "" "$nm1,$nm2,$nm3" + +#add cluster +incr exit_code [_add_cluster "$tc1,$tc2"] +if { $exit_code } { + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2" + exit $exit_code +} + +#add accounts +incr exit_code [_add_acct "$tc1,$tc2" "$nm1,$nm2,$nm3"] +if { $exit_code } { + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2" + exit $exit_code +} + +# OK This is the real test, first test change the desc and org of the accounts +incr exit_code [_mod_acct "" "$nm1,$nm2,$nm3" $ds2 $or2 "" "" "" "" "" "" "" ""] +if { $exit_code } { + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2" + exit $exit_code +} + +# +# Use sacctmgr to list the test account modifications +# +set matches 0 +set my_pid [spawn $sacctmgr -n -p list account acc=$nm1,$nm2,$nm3 format="Account,Desc,Org"] +expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($nm1|$nm2|$nm3).$ds2.$or2." { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} +if {$matches != 3} { + send_user "\nFAILURE: Account modification 1 incorrect with only $matches.\n" + incr exit_code 1 +} + +# Next, test change the limits of the accounts +incr exit_code [_mod_acct "" "$nm1" "" "" "" $fs2 $mc2 $mj2 $mn2 $mw2 "" ""] +if { $exit_code } { + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2" + exit $exit_code +} + +# +# Use sacctmgr to list the test account modifications +# +set matches 0 +set my_pid [spawn $sacctmgr -n -p list assoc acc=$nm1,$nm2,$nm3 format="Account,Cluster,Fairshare,MaxC,MaxJ,MaxN,MaxW"] +expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($nm1.($tc1|$tc2).$fs2.$mc2.$mj2.$mn2.$mw2.|($nm2|$nm3).($tc1|$tc2).1)" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} + +if {$matches != 6} { + send_user "\nFAILURE: Account modification 2 incorrect with only $matches.\n" + incr exit_code 1 +} + +# Next, test change the desc and limits of the accounts +incr exit_code [_mod_acct "" "$nm1" $ds1 $or1 "" -1 -1 -1 -1 -1 "" ""] +if { $exit_code } { + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2" + exit $exit_code +} + +# +# Use sacctmgr to list the test account modifications +# +set matches 0 +set my_pid [eval spawn $sacctmgr -n -p list acct withassoc acc=$nm1,$nm2,$nm3 format="Account,Desc,Org,Cluster,Fairshare,MaxC,MaxJ,MaxN,MaxW"] +expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + # Any time you use () around something you need to combine all + # those things together since you can miss some and they be thrown away + -re "($nm1.$ds1.$or1.($tc1|$tc2).1.....|($nm2|$nm3).$ds2.$or2.($tc1|$tc2).1.....)" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} + +if {$matches != 6} { + send_user "\nFAILURE: HERE ??? Account modification 2 incorrect with $matches.\n" + incr exit_code 1 +} + +# This is the end below here + +incr exit_code [_remove_acct "$tc1,$tc2" "$nm1,$nm2,$nm3"] +incr exit_code [_remove_acct "" "$nm1,$nm2,$nm3"] +incr exit_code [_remove_cluster "$tc1,$tc2"] + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} else { + send_user "\nFAILURE\n" +} +exit $exit_code + diff --git a/testsuite/expect/test21.14 b/testsuite/expect/test21.14 new file mode 100755 index 000000000..b73c438cc --- /dev/null +++ b/testsuite/expect/test21.14 @@ -0,0 +1,750 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr modify accounts, descriptions, limits, +# and parents down and up the tree +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.14" +set exit_code 0 +set tc1 tcluster1 +set tc2 tcluster2 +set tc3 tcluster3 +set fs1 2500 +set fs2 1700 +set mc1 1000000 +set mc2 700000 +set mj1 50 +set mj2 70 +set mn1 300 +set mn2 700 +set mw1 01:00:00 +set mw2 00:07:00 +set acc account +set ass associations +set nm1 testaccta1 +set nm2 testaccta2 +set nm3 testaccta3 +set des Description +set ds1 testaccounta1 +set ds2 testacct +set org Organization +set or1 accountorga1 +set or2 acttrg +set qs QosLevel +set qs1 normal +set par parent +set access_err 0 + +print_header $test_id + +set timeout 60 + +# +# Check accounting config and bail if not found. +# +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + + +# +# Use sacctmgr to create a cluster +# +proc _add_cluster {name} { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 + } + + set my_pid [spawn $sacctmgr -i add cluster $name] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding Cluster" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + exit_code 1 + } + eof { + wait + } + } + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters + got $matches\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + + return $exit_code +} + +# +# Use sacctmgr to remove the test cluster +# +proc _remove_cluster {name} { + global access_err sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 + } + + set my_pid [spawn $sacctmgr -i delete cluster $name] + expect { + -re "privilege to preform this action" { + set access_err 1 + exp_continue + } + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Deleting clusters" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + if {$access_err != 0} { + return 1 + } + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $matches\n" + incr exit_code 1 + } + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code +} + +# +# Use sacctmgr to add an account +# +proc _add_acct { cluster name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 + } + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" + } + + set my_pid [eval spawn $sacctmgr -i add account $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding Account" { + incr matches + exp_continue + } + -re "Associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 2} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $matches\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code +} + +# +# Use sacctmgr to remove an account +# +proc _remove_acct { cluster name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 1 + set check "Deleting account" + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 + } + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" + set check "Deleting account associations" + } + + set my_pid [eval spawn $sacctmgr -i delete account $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "$check" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting account. + got $matches\n" + incr exit_code 1 + } + + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code +} + +# +# Use sacctmgr to modify an account +# +proc _mod_acct { cluster name desc org parent fs maxcpu maxjob maxnodes maxwall wdesc worg} { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set expected 0 + set acct_stuff 0 + set assoc_stuff 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to modify\n" + return 1 + } + + #set up the where + set wcommand "where $name" + + if { [string length $cluster] } { + set wcommand "$wcommand cluster=$cluster" + } + + if { [string length $wdesc] } { + set wcommand "$wcommand description='$wdesc'" + } + + if { [string length $worg] } { + set wcommand "$wcommand organization='$worg'" + } + + #set up the set + set scommand "set" + if { [string length $parent] } { + set scommand "$scommand parent=$parent" + set assoc_stuff 1 + } + + if { [string length $fs] } { + set scommand "$scommand fairshare=$fs" + set assoc_stuff 1 + } + + if { [string length $maxcpu] } { + set scommand "$scommand maxc=$maxcpu" + set assoc_stuff 1 + } + + if { [string length $maxjob] } { + set scommand "$scommand maxj=$maxjob" + set assoc_stuff 1 + } + + if { [string length $maxnodes] } { + set scommand "$scommand maxn=$maxnodes" + set assoc_stuff 1 + } + + if { [string length $maxwall] } { + set scommand "$scommand maxw=$maxwall" + set assoc_stuff 1 + } + + if { [string length $desc] } { + set scommand "$scommand description='$desc'" + set acct_stuff 1 + } + + if { [string length $org] } { + set scommand "$scommand organization='$org'" + set acct_stuff 1 + } + + incr expected $acct_stuff + incr expected $assoc_stuff + + set my_pid [eval spawn $sacctmgr -i modify account $scommand $wcommand ] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Modified accounts" { + incr matches + exp_continue + } + -re "Modified account associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != $expected} { + send_user "\nFAILURE: sacctmgr had a problem modifying account. + got $matches needed $expected\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code +} + +#make sure we have a clean system and permission to do this work +_remove_cluster "$tc1,$tc2" +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} +_remove_acct "" "$nm1,$nm2,$nm3" + +#add cluster +incr exit_code [_add_cluster "$tc1,$tc2"] +if { $exit_code } { + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2" + exit $exit_code +} + +#add accounts +incr exit_code [_add_acct "$tc1,$tc2" "$nm1,$nm2,$nm3"] +if { $exit_code } { + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2" + exit $exit_code +} + +# OK This is the real test, first test change the desc and org of the accounts +incr exit_code [_mod_acct "" "$nm1,$nm2,$nm3" $ds2 $or2 "" "" "" "" "" "" "" ""] +if { $exit_code } { + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2" + exit $exit_code +} + +# +# Use sacctmgr to list the test account modifications +# +set matches 0 +set my_pid [spawn $sacctmgr -n -p list account acc=$nm1,$nm2,$nm3 format="Account,Desc,Org"] +expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($nm1|$nm2|$nm3).$ds2.$or2." { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} +if {$matches != 3} { + send_user "\nFAILURE: Account modification 1 incorrect with only $matches.\n" + incr exit_code 1 +} + +# Next, test change the limits of the accounts +incr exit_code [_mod_acct "" "$nm1,$nm2,$nm3" "" "" "" $fs2 $mc2 $mj2 $mn2 $mw2 "" ""] +if { $exit_code } { + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2" + exit $exit_code +} + +# +# Use sacctmgr to list the test account modifications +# +set matches 0 +set my_pid [spawn $sacctmgr -n -p list assoc acc=$nm1,$nm2,$nm3 format="Account,Cluster,Fairshare,MaxC,MaxJ,MaxN,MaxW"] +expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($nm1|$nm2|$nm3).($tc1|$tc2).$fs2.$mc2.$mj2.$mn2.$mw2." { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} +if {$matches != 6} { + send_user "\nFAILURE: Account modification 2 incorrect with only $matches.\n" + incr exit_code 1 +} + +# Next, test change the desc and limits of the accounts +incr exit_code [_mod_acct "" "$nm1,$nm3" $ds1 $or1 "" -1 -1 -1 -1 -1 "" ""] +if { $exit_code } { + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2" + exit $exit_code +} + +# +# Use sacctmgr to list the test account modifications +# +set matches 0 +set my_pid [eval spawn $sacctmgr -n -p list acct withassoc acc=$nm1,$nm2,$nm3 format="Account,Desc,Org,Cluster,Fairshare,MaxC,MaxJ,MaxN,MaxW"] +expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + # Any time you use () around something you need to combine all + # those things together since you can miss some and they be thrown away + -re "(($nm1|$nm3).$ds1.$or1.($tc1|$tc2).1.....|$nm2.$ds2.$or2.($tc1|$tc2).$fs2.$mc2.$mj2.$mn2.$mw2.)" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} + +if {$matches != 6} { + send_user "\nFAILURE: Account modification 2 incorrect with $matches.\n" + incr exit_code 1 +} + +# Next, test change the parent of $n3 to be $n2 on $tc1 sibling to be a child +incr exit_code [_mod_acct "$tc1" "$nm3" "" "" "$nm2" "" "" "" "" "" "" ""] +if { $exit_code } { + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2" + exit $exit_code +} + +# +# Use sacctmgr to list the test account modifications +# +set matches 0 +set my_pid [eval spawn $sacctmgr -n -p list assoc acc=$nm3 format="Account,ParentN,Cluster,Fairshare,MaxC,MaxJ,MaxN,MaxW"] +expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "$nm3.$nm2.$tc1.1.$mc2.$mj2.$mn2.$mw2." { + incr matches + exp_continue + } + -re "$nm3.root.$tc2.1....." { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} +if {$matches != 2} { + send_user "\nFAILURE: Account modification 3 incorrect with $matches.\n" + incr exit_code 1 +} + +# Next, test change the parent of $n2 to be $n3 on $tc1 +# making the child the parent +incr exit_code [_mod_acct "$tc1" "$nm2" "" "" "$nm3" "" "" "" "" "" "" ""] +if { $exit_code } { + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2" + exit $exit_code +} + +# +# Use sacctmgr to list the test account modifications +# +set matches 0 +set my_pid [eval spawn $sacctmgr -n -p list assoc acc=$nm3,$nm2 format="Account,ParentN,Cluster,Fairshare,MaxC,MaxJ,MaxN,MaxW"] +expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($nm2.root.$tc2.$fs2.$mc2.$mj2.$mn2.$mw2.|$nm2.$nm3.$tc1.$fs2.$mc2.$mj2.$mn2.$mw2.|$nm3.root.($tc1|$tc2).1.....)" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} +if {$matches != 4} { + send_user "\nFAILURE: Account modification 3 incorrect with $matches.\n" + incr exit_code 1 +} + + +# Next, test change the parent of $n3 to be $n2 on $tc1 again +# only this time the parent will become the child instead of it being a sibling. +incr exit_code [_mod_acct "$tc1" "$nm3" "" "" "$nm2" "" "" "" "" "" "" ""] +if { $exit_code } { + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2" + exit $exit_code +} + +# +# Use sacctmgr to list the test account modifications +# +set matches 0 +set my_pid [eval spawn $sacctmgr -n -p list assoc acc=$nm3,$nm2 format="Account,ParentN,Cluster,Fairshare,MaxC,MaxJ,MaxN,MaxW"] +expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($nm2.root.($tc1|$tc2).$fs2.$mc2.$mj2.$mn2.$mw2.|$nm3.$nm2.$tc1.1.$mc2.$mj2.$mn2.$mw2.|$nm3.root.$tc2.1.....)" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } +} +if {$matches != 4} { + send_user "\nFAILURE: Account modification 3 incorrect with $matches.\n" + incr exit_code 1 +} + + + + + + + +# This is the end below here + +incr exit_code [_remove_acct "$tc1,$tc2" "$nm1,$nm2,$nm3"] +incr exit_code [_remove_acct "" "$nm1,$nm2,$nm3"] +incr exit_code [_remove_cluster "$tc1,$tc2"] + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} else { + send_user "\nFAILURE\n" +} +exit $exit_code + diff --git a/testsuite/expect/test21.15 b/testsuite/expect/test21.15 new file mode 100755 index 000000000..4cd661089 --- /dev/null +++ b/testsuite/expect/test21.15 @@ -0,0 +1,291 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr add an user +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.15" +set exit_code 0 +set acmatches 0 +set aamatches 0 +set lmatches 0 +set damatches 0 +set dcmatches 0 +set dumatches 0 +set not_support 0 +set add add +set lis list +set del delete +set mod modify +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall +set clu cluster +set tc1 tcluster1 +set fs1 2500 +set fs2 2200 +set mc1 1000000 +set mc2 200000 +set mj1 50 +set mj2 20 +set mn1 300 +set mn2 200 +set mw1 01:00:00 +set mw2 00:20:00 +set acc account +set acs accounts +set ass associations +set nm1 testaccta1 +set nm2 tacct2 +set des Description +set ds1 "Test Account A1" +set ds1 "TestAccount2" +set org Organization +set or1 "Account Org A1" +set or1 "AcctOrg2" +set qs QosLevel +set qs1 normal +set usr user +set us1 tuser1 +set al AdminLevel +set aln None +set dac DefaultAccount +set par Partitions +set dbu debug +set access_err 0 + +print_header $test_id + +if { 0 == 0 } { + send_user "\n This test is not ready\n" + exit 0 +} + +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + +# +# Use sacctmgr to create a cluster +# +set sadd_pid [spawn $sacctmgr -i add $clu $nams=$tc1 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] +expect { + -re "privilege to preform this action" { + set access_err 1 + exp_continue + } + -re "Adding Cluster" { + incr acmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} +if {$acmatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters + got $acmatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to add an account +# +set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc1 $des="$ds1" $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$nm1 $org="$or1" $qs=$qs1] +expect { + -re "Adding Account" { + incr aamatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$aamatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $aamatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to add a user +# +set as_list_pid [spawn $sacctmgr -i $add $usr $acs=$nm1 $al=$aln $clu=$tc1 $dac=$nm1 $fs=$fs2 $mc=$mc2 $mj=$mj2 $mn=$mn2 $mw=$mw2 $nams=$us1 $par=$dbu $qs=$qs1 ] +expect { + -re "$nams *$nm1" { + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the test user +# +set as_list_pid [spawn $sacctmgr list $usr $nams=$us1 WithAssocs] +expect { + -re "$tc1 *$nm1 *$qs1 *None *$tc1 *$nm1 *$par *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list user not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +# +# Use sacctmgr to delete the test user +# +set sadel_pid [spawn $sacctmgr -i $del $usr $us1] +expect { + -re "Deleting users" { + incr dumatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dumatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting user got $dumatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test account +# +set sadel_pid [spawn $sacctmgr -i $del $acc $nm1] +expect { + -re "Deleting account" { + incr damatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$damatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting account got $damatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test cluster +# +set sadel_pid [spawn $sacctmgr -i $del $clu $tc1] +expect { + -re "Deleting clusters" { + incr dcmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dcmatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $dcmatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" + } else { + send_user "\nFAILURE\n" + } + +exit $exit_code diff --git a/testsuite/expect/test21.16 b/testsuite/expect/test21.16 new file mode 100755 index 000000000..ddb8b5b20 --- /dev/null +++ b/testsuite/expect/test21.16 @@ -0,0 +1,363 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr add and list multiple users +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.16" +set exit_code 0 +set acmatches 0 +set aamatches 0 +set aa2matches 0 +set lmatches 0 +set damatches 0 +set dcmatches 0 +set dumatches 0 +set dlumatches 0 +set not_support 0 +set add add +set lis list +set del delete +set mod modify +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall +set clu cluster +set tc1 tclus1 +set tc2 tclus2 +set tc3 tclus3 +set fs1 2500 +set fs2 2200 +set mc1 1000000 +set mc2 200000 +set mj1 50 +set mj2 20 +set mn1 300 +set mn2 200 +set mw1 01:00:00 +set mw2 00:20:00 +set acc account +set acs accounts +set ass associations +set nm1 testaccta1 +set nm2 tacct2 +set des Description +set ds1 "Test Account A1" +set ds1 "TestAccount2" +set org Organization +set or1 "Account Org A1" +set or1 "AcctOrg2" +set qs QosLevel +set qs1 normal +set usr user +set us1 tuser1 +set us2 tuser2 +set al AdminLevel +set aln None +set dac DefaultAccount +set par Partitions +set dbu debug +set access_err 0 + +print_header $test_id + +if { 0 == 0 } { + send_user "\n This test is not ready\n" + exit 0 +} + +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + +# +# Use sacctmgr to create a cluster +# +set sadd_pid [spawn $sacctmgr -i add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] +expect { + -re "privilege to preform this action" { + set access_err 1 + exp_continue + } + -re "Adding Cluster" { + incr acmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} +if {$acmatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters + got $acmatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to add an account +# +set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc1,$tc2 $des="$ds1" $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$nm1 $org="$or1" $qs=$qs1] +expect { + -re "Adding Account" { + incr aamatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$aamatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $aamatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to add a second account +# +set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc2 $des="$ds1" $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$nm2 $org="$or1" $qs=$qs1] +expect { + -re "Adding Account" { + incr aa2matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$aa2matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $aamatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to add a user +# +set as_list_pid [spawn $sacctmgr -i $add $usr $acs=$nm1 $al=$aln $clu=$tc1 $dac=$nm2 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$us1 $par=$dbu $qs=$qs1 ] +expect { + -re "$nams *$nm1" { + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to add a second user +# +set as_list_pid [spawn $sacctmgr -i $add $usr $acs=$nm1,$nm2 $al=$aln $clu=$tc1,$tc2 $dac=$nm1 $fs=$fs2 $mc=$mc2 $mj=$mj2 $mn=$mn2 $mw=$mw2 $nams=$us2 $par=$dbu $qs=$qs1 ] +expect { + -re "$nams *$nm2" { + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the test user +# +#exp_internal 1 +set as_list_pid [spawn $sacctmgr list $usr $nams=$us1,$us2 WithAssocs] +expect { + -re "$us1 *$nm2 *$qs1 *$aln *$tc1 *$nm1 *$dbu *$fs1 *$mc1 *$mj1 *$mn1 *$mw1" { + incr dlumatches + exp_continue + } + -re "$us2 *$nm1 *$qs1 *$aln *$tc1 *$nm1 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + incr dlumatches + exp_continue + } + -re "$us2 *$nm1 *$qs1 *$aln *$tc2 *$nm2 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + incr dlumatches + exp_continue + } + -re "$us2 *$nm1 *$qs1 *$aln *$tc2 *$nm1 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + incr dlumatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list user not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dlumatches != 4} { + send_user "\nFAILURE: sacctmgr had a problem listing user got $dumatches\n" + set exit_code 1 +} +#exp_internal 0 +# +# Use sacctmgr to delete the test user +# +set sadel_pid [spawn $sacctmgr -i $del $usr $us1,$us2] +expect { + -re "Deleting users" { + incr dumatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dumatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting user got $dumatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test account +# +set sadel_pid [spawn $sacctmgr -i $del $acc $nm1,$nm2] +expect { + -re "Deleting account" { + incr damatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$damatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting account got $damatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test cluster +# +set sadel_pid [spawn $sacctmgr -i $del $clu $tc1,$tc2,$tc3] +expect { + -re "Deleting clusters" { + incr dcmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dcmatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $dcmatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" + } else { + send_user "\nFAILURE\n" + } + +exit $exit_code diff --git a/testsuite/expect/test21.17 b/testsuite/expect/test21.17 new file mode 100755 index 000000000..3f1b47d9a --- /dev/null +++ b/testsuite/expect/test21.17 @@ -0,0 +1,397 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr modify a user +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.17" +set exit_code 0 +set acmatches 0 +set aamatches 0 +set aa2matches 0 +set lmatches 0 +set damatches 0 +set dcmatches 0 +set dumatches 0 +set dlumatches 0 +set not_support 0 +set add add +set lis list +set del delete +set mod modify +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall +set clu cluster +set tc1 tclus1 +set tc2 tclus2 +set tc3 tclus3 +set fs1 2500 +set fs2 1200 +set mc1 1000000 +set mc2 200000 +set mj1 100 +set mj2 50 +set mn1 300 +set mn2 200 +set mw1 01:00:00 +set mw2 02:00:00 +set acc account +set acs accounts +set ass associations +set nm1 tacct1 +set nm2 tacct2 +set des Description +set ds1 "onestestAccount" +set ds2 "testaccount2" +set org Organization +set or1 "oneaccountOrg" +set or2 "acctorg2" +set qs QosLevel +set qs1 normal +set usr user +set us1 tuser1 +set us2 tuser2 +set al AdminLevel +set aln None +set ala Admin +set alo Operator +set dac DefaultAccount +set par Partitions +set dbu debug +set access_err 0 + +print_header $test_id + +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + +# +# Use sacctmgr to create a cluster +# +set sadd_pid [spawn $sacctmgr -i add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] +expect { + -re "privilege to preform this action" { + set access_err 1 + exp_continue + } + -re "Adding Cluster" { + incr acmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} +if {$acmatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters + got $acmatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to add test accounts +# +set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc1,$tc2,$tc3 $des="$ds1" $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$nm1 $org="$or1" $qs=$qs1] +expect { + -re "Adding Account" { + incr aamatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$aamatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $aamatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to add a second account +# +set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc1,$tc2,$tc3 $des=$ds2 $fs=$fs2 $mc=$mc2 $mj=$mj2 $mn=$mn2 $mw=$mw2 $nams=$nm2 $org=$or2 $qs=$qs1] +expect { + -re "Adding Account" { + incr aa2matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$aa2matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $aamatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to add a user +# +set as_list_pid [spawn $sacctmgr -i $add $usr $acs=$nm1 $al=$alo $clu=$tc1,$tc2 $dac=$nm2 $fs=$fs2 $mc=$mc2 $mj=$mj2 $mn=$mn2 $mw=$mw2 $nams=$us1,$us2 $par=$dbu $qs=$qs1 ] +expect { + -re "$nams *$nm1" { + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the test user +# +set as_list_pid [spawn $sacctmgr list $usr $nams=$us1,$us2 WithAssocs] +expect { + -re "$us1" { + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list user not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +# +# Use sacctmgr to modify user +# +set as_list_pid [spawn $sacctmgr -i $mod $usr set $al=$ala $dac=$nm1 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $qs=$qs1 where $acs=$nm2 $al=$alo $clu=$tc2 $dac=$nm2 $nams=$us2 $par=$dbu $qs=$qs1 ] +expect { + -re "$nams *$nm2" { + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the test user +# +set as_list_pid [spawn $sacctmgr list $usr $nams=$us1,$us2 WithAssocs] +expect { + -re "$us1 *$nm2 *$qs1 *$alo *$tc1 *$nm2 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + incr dlumatches + exp_continue + } + -re "$us1 *$nm2 *$qs1 *$alo *$tc1 *$nm1 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + incr dlumatches + exp_continue + } + -re "$us1 *$nm2 *$qs1 *$alo *$tc2 *$nm2 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + incr dlumatches + exp_continue + } + -re "$us1 *$nm2 *$qs1 *$alo *$tc2 *$nm1 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + incr dlumatches + exp_continue + } + -re "$us2 *$nm1 *$qs1 *$ala.* *$tc1 *$nm2 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + incr dlumatches + exp_continue + } + -re "$us2 *$nm1 *$qs1 *$ala.* *$tc1 *$nm1 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + incr dlumatches + exp_continue + } + -re "$us2 *$nm1 *$qs1 *$ala.* *$tc2 *$nm2 *$dbu *$fs1 *$mc1 *$mj1 *$mn1 *$mw1" { + incr dlumatches + exp_continue + } + -re "$us2 *$nm1 *$qs1 *$ala.* *$tc2 *$nm1 *$dbu *$fs1 *$mc1 *$mj1 *$mn1 *$mw1" { + incr dlumatches + exp_continue + } + -re "$tc3" { + send_user "\nFAILURE: $tc3 found but not expected\n" + set exit_code 1 + } + timeout { + send_user "\nFAILURE: sacctmgr list user not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dlumatches != 8} { + send_user "\nFAILURE: sacctmgr had a problem listing user got $dumatches of 8\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test user +# +set sadel_pid [spawn $sacctmgr -i $del $usr $us1,$us2] +expect { + -re "Deleting users" { + incr dumatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dumatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting user got $dumatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test account +# +set sadel_pid [spawn $sacctmgr -i $del $acc $nm1,$nm2] +expect { + -re "Deleting account" { + incr damatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$damatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting account got $damatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test cluster +# +set sadel_pid [spawn $sacctmgr -i $del $clu $tc1,$tc2,$tc3] +expect { + -re "Deleting clusters" { + incr dcmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dcmatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $dcmatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} else { + send_user "\nFAILURE\n" +} + +exit $exit_code diff --git a/testsuite/expect/test21.18 b/testsuite/expect/test21.18 new file mode 100755 index 000000000..d7dd326f2 --- /dev/null +++ b/testsuite/expect/test21.18 @@ -0,0 +1,397 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr modify multiple users +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.18" +set exit_code 0 +set acmatches 0 +set aamatches 0 +set aa2matches 0 +set lmatches 0 +set damatches 0 +set dcmatches 0 +set dumatches 0 +set dlumatches 0 +set not_support 0 +set add add +set lis list +set del delete +set mod modify +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall +set clu cluster +set tc1 tclus1 +set tc2 tclus2 +set tc3 tclus3 +set fs1 2500 +set fs2 1200 +set mc1 1000000 +set mc2 200000 +set mj1 100 +set mj2 50 +set mn1 300 +set mn2 200 +set mw1 01:00:00 +set mw2 02:00:00 +set acc account +set acs accounts +set ass associations +set nm1 tacct1 +set nm2 tacct2 +set des Description +set ds1 "onestestAccount" +set ds2 "testaccount2" +set org Organization +set or1 "oneaccountOrg" +set or2 "acctorg2" +set qs QosLevel +set qs1 normal +set usr user +set us1 tuser1 +set us2 tuser2 +set al AdminLevel +set aln None +set ala Administrator +set alo Operator +set dac DefaultAccount +set par Partitions +set dbu debug +set access_err 0 + +print_header $test_id + +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + +# +# Use sacctmgr to create a cluster +# +set sadd_pid [spawn $sacctmgr -i add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] +expect { + -re "privilege to preform this action" { + set access_err 1 + exp_continue + } + -re "Adding Cluster" { + incr acmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} +if {$acmatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters + got $acmatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to add test accounts +# +set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc1,$tc2,$tc3 $des="$ds1" $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$nm1 $org="$or1" $qs=$qs1] +expect { + -re "Adding Account" { + incr aamatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$aamatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $aamatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to add a second account +# +set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc1,$tc2,$tc3 $des=$ds2 $fs=$fs2 $mc=$mc2 $mj=$mj2 $mn=$mn2 $mw=$mw2 $nams=$nm2 $org=$or2 $qs=$qs1] +expect { + -re "Adding Account" { + incr aa2matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$aa2matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $aamatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to add a user +# +set as_list_pid [spawn $sacctmgr -i $add $usr $acs=$nm1 $al=$alo $clu=$tc1,$tc2 $dac=$nm2 $fs=$fs2 $mc=$mc2 $mj=$mj2 $mn=$mn2 $mw=$mw2 $nams=$us1,$us2 $par=$dbu $qs=$qs1 ] +expect { + -re "$nams *$nm1" { + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the test user +# +set as_list_pid [spawn $sacctmgr list $usr $nams=$us1,$us2 WithAssocs] +expect { + -re "$us1" { + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list user not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +# +# Use sacctmgr to modify user +# +set as_list_pid [spawn $sacctmgr -i $mod $usr set $al=$ala $dac=$nm1 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $qs=$qs1 where $acs=$nm2 $al=$alo $clu=$tc2 $dac=$nm2 $nams=$us1,$us2 $par=$dbu $qs=$qs1 ] +expect { + -re "$nams *$nm2" { + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} +# +# Use sacctmgr to list the test user +# +set as_list_pid [spawn $sacctmgr -n -p list $usr $nams=$us1,$us2 WithAssocs] +expect { + -re "$us1.$nm1.$qs1.$ala.$tc1.$nm2.$dbu.$fs2.$mc2.$mj2.$mn2.$mw2" { + incr dlumatches + exp_continue + } + -re "$us1.$nm1.$qs1.$ala.$tc1.$nm1.$dbu.$fs2.$mc2.$mj2.$mn2.$mw2" { + incr dlumatches + exp_continue + } + -re "$us1.$nm1.$qs1.$ala.$tc2.$nm2.$dbu.$fs1.$mc1.$mj1.$mn1.$mw1" { + incr dlumatches + exp_continue + } + -re "$us1.$nm1.$qs1.$ala.$tc2.$nm1.$dbu.$fs1.$mc1.$mj1.$mn1.$mw1" { + incr dlumatches + exp_continue + } + -re "$us2.$nm1.$qs1.$ala.$tc1.$nm2.$dbu.$fs2.$mc2.$mj2.$mn2.$mw2" { + incr dlumatches + exp_continue + } + -re "$us2.$nm1.$qs1.$ala.$tc1.$nm1.$dbu.$fs2.$mc2.$mj2.$mn2.$mw2" { + incr dlumatches + exp_continue + } + -re "$us2.$nm1.$qs1.$ala.$tc2.$nm2.$dbu.$fs1.$mc1.$mj1.$mn1.$mw1" { + incr dlumatches + exp_continue + } + -re "$us2.$nm1.$qs1.$ala.$tc2.$nm1.$dbu.$fs1.$mc1.$mj1.$mn1.$mw1" { + incr dlumatches + exp_continue + } + -re "$tc3" { + send_user "\nFAILURE: $tc3 found but not expected\n" + exp_continue + set exit_code 1 + } + timeout { + send_user "\nFAILURE: sacctmgr list user not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dlumatches != 8} { + send_user "\nFAILURE: sacctmgr had a problem finding all user listing getting $dlumatches instead of 8\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test user +# +set sadel_pid [spawn $sacctmgr -i $del $usr $us1,$us2] +expect { + -re "Deleting users" { + incr dumatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dumatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting user got $dumatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test account +# +set sadel_pid [spawn $sacctmgr -i $del $acc $nm1,$nm2] +expect { + -re "Deleting account" { + incr damatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$damatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting account got $damatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test cluster +# +set sadel_pid [spawn $sacctmgr -i $del $clu $tc1,$tc2,$tc3] +expect { + -re "Deleting clusters" { + incr dcmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dcmatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $dcmatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" + } else { + send_user "\nFAILURE\n" + } + +exit $exit_code diff --git a/testsuite/expect/test21.19 b/testsuite/expect/test21.19 new file mode 100755 index 000000000..789a61d9f --- /dev/null +++ b/testsuite/expect/test21.19 @@ -0,0 +1,361 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr add/delete coordinator +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.19" +set exit_code 0 +set acmatches 0 +set aamatches 0 +set camatches 0 +set cumatches 0 +set lmatches 0 +set damatches 0 +set dcmatches 0 +set dumatches 0 +set not_support 0 +set add add +set lis list +set del delete +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall +set clu cluster +set tc1 tcluster1 +set fs1 2500 +set mc1 1000000 +set mj1 50 +set mn1 300 +set mw1 01:00:00 +set acc account +set acs accounts +set ass associations +set nm1 tacct1 +set des Description +set ds1 "tacct1" +set org Organization +set or1 "acctorg2" +set qs QosLevel +set qs1 normal +set usr user +set us1 tuser1 +set al AdminLevel +set aln None +set dac DefaultAccount +set cor Coordinator +set par Partition +set dbu debug +set access_err 0 + +print_header $test_id + +# +# Check accounting config and bail if not found. +# +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + + +# +# Use sacctmgr to create a cluster +# +set sadd_pid [spawn $sacctmgr -i add $clu $nams=$tc1 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] +expect { + -re "privilege to preform this action" { + set access_err 1 + exp_continue + } + -re "Adding Cluster" { + incr acmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} +if {$acmatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters + got $acmatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to add an account +# +set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc1 $des="$ds1" $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$nm1 $org="$or1" $qs=$qs1] +expect { + -re "Adding Account" { + incr aamatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$aamatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $aamatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to add a user +# +set as_list_pid [spawn $sacctmgr -i $add $usr $acs=$nm1 $al=$aln $clu=$tc1 $dac=$nm1 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$us1 $par=$dbu $qs=$qs1 ] +expect { + -re "$nams *$nm1" { + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to add a coordinator +# +set as_list_pid [spawn $sacctmgr -i $add $cor $acs=$nm1 $nams=$us1 ] +expect { + -re "Adding *$cor" { + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the test coordinator user +# +set as_list_pid [spawn $sacctmgr -n list $usr withcoor WithAssocs] +expect { + -re "$us1 *$nm1 *$qs1 *None *$tc1 *$nm1 *$dbu *$fs1 *$mc1 *$mj1 *$mn1 *$mw1 *$nm1" { + incr cumatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list user not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$cumatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem finding coordinator user. + got $aamatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the test coordinator account. +# +set as_list_pid [spawn $sacctmgr -n list $acc withcoor WithAssocs] +expect { + -re "$nm1 *$ds1 *$or1 *$qs1 *$tc1 *root *$fs1 *$mc1 *$mj1 *$mn1 *$mw1 $us1" { + incr camatches + exp_continue + } + -re "$nm1 *$ds1 *$or1 *$qs1 *$tc1 *$us1 *$fs1 *$mc1 *$mj1 *$mn1 *$mw1 $us1" { + incr camatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list account not responding\n" + slow_kill $as_list_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$camatches != 2} { + send_user "\nFAILURE: sacctmgr had a problem finding coordinator account.\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test coordinator +# +set sadel_pid [spawn $sacctmgr -i $del $cor $us1] +expect { + -re "Deleting users" { + incr dumatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +# +# Use sacctmgr to delete the test user +# +set sadel_pid [spawn $sacctmgr -i $del $usr $us1] +expect { + -re "Deleting users" { + incr dumatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dumatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting user got $dumatches\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test account +# +set sadel_pid [spawn $sacctmgr -i $del $acc $nm1] +expect { + -re "Deleting account" { + incr damatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$damatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting account got $damatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to delete the test cluster +# +set sadel_pid [spawn $sacctmgr -i $del $clu $tc1] +expect { + -re "Deleting clusters" { + incr dcmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dcmatches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $dcmatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" + } else { + send_user "\nFAILURE\n" + } + +exit $exit_code diff --git a/testsuite/expect/test21.4 b/testsuite/expect/test21.4 index fc088c5d2..bd7ecca81 100755 --- a/testsuite/expect/test21.4 +++ b/testsuite/expect/test21.4 @@ -42,7 +42,7 @@ print_header $test_id # Report the sacctmgr version keyword # -spawn $sacctmgr version +spawn $sacctmgr --version expect { -re "slurm \[0-9]*.\[0-9]*.\[0-9]*" { incr matches diff --git a/testsuite/expect/test21.5 b/testsuite/expect/test21.5 index 60f8c6d93..2a81fc0c1 100755 --- a/testsuite/expect/test21.5 +++ b/testsuite/expect/test21.5 @@ -55,15 +55,24 @@ set mc1 1000000 set mj1 50 set mn1 300 set mw1 01:00:00 - +set access_err 0 print_header $test_id +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + # # Use sacctmgr to create a cluster # set sadd_pid [spawn $sacctmgr -i add $clu $nams=$tc1 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] expect { + -re "privilege to preform this action" { + set access_err 1 + exp_continue + } -re "Adding Cluster" { incr amatches exp_continue @@ -105,10 +114,12 @@ expect { wait } } - +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} if {$amatches != 8} { - send_user "\nFAILURE: sacctmgr had a problem adding clusters - got $amatches\n" + send_user "\nFAILURE: sacctmgr had a problem adding clusters got $amatches\n" set exit_code 1 } if { ![check_acct_associations] } { diff --git a/testsuite/expect/test21.6 b/testsuite/expect/test21.6 index 7ebf96df6..3bfb62ac5 100755 --- a/testsuite/expect/test21.6 +++ b/testsuite/expect/test21.6 @@ -57,15 +57,24 @@ set mc1 1000000 set mj1 50 set mn1 300 set mw1 01:00:00 - +set access_err 0 print_header $test_id +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + # # Use sacctmgr to create a cluster # set sadd_pid [spawn $sacctmgr $add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] expect { + -re "privilege to preform this action" { + set access_err 1 + exp_continue + } -re "Adding Cluster" { incr amatches exp_continue @@ -124,7 +133,10 @@ expect { wait } } - +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} if {$amatches != 12} { send_user "\nFAILURE: sacctmgr had a problem adding clusters\n" set exit_code 1 diff --git a/testsuite/expect/test21.7 b/testsuite/expect/test21.7 index 622967c51..0f464fdd1 100755 --- a/testsuite/expect/test21.7 +++ b/testsuite/expect/test21.7 @@ -57,15 +57,24 @@ set mc1 1000000 set mj1 50 set mn1 300 set mw1 01:00:00 - +set access_err 0 print_header $test_id +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + # # Use sacctmgr to create a cluster # set sadd_pid [spawn $sacctmgr $add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] expect { + -re "privilege to preform this action" { + set access_err 1 + exp_continue + } -re "Adding Cluster" { incr amatches exp_continue @@ -124,7 +133,10 @@ expect { wait } } - +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} if {$amatches != 12} { send_user "\nFAILURE: sacctmgr had a problem adding clusters\n" set exit_code 1 diff --git a/testsuite/expect/test21.8 b/testsuite/expect/test21.8 index 170044417..6d414415c 100755 --- a/testsuite/expect/test21.8 +++ b/testsuite/expect/test21.8 @@ -63,15 +63,24 @@ set mn1 300 set mn2 125 set mw1 01:00:00 set mw2 12:00:00 - +set access_err 0 print_header $test_id +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + # # Use sacctmgr to create a cluster # set sadd_pid [spawn $sacctmgr $add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] expect { + -re "privilege to preform this action" { + set access_err 1 + exp_continue + } -re "Adding Cluster" { incr amatches exp_continue @@ -130,7 +139,10 @@ expect { wait } } - +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} if {$amatches != 12} { send_user "\nFAILURE: sacctmgr had a problem adding clusters\n" set exit_code 1 diff --git a/testsuite/expect/test21.9 b/testsuite/expect/test21.9 index 12c0dd911..4ec519450 100755 --- a/testsuite/expect/test21.9 +++ b/testsuite/expect/test21.9 @@ -63,15 +63,24 @@ set mn1 300 set mn2 125 set mw1 01:00:00 set mw2 12:00:00 - +set access_err 0 print_header $test_id +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + # # Use sacctmgr to create a cluster # set sadd_pid [spawn $sacctmgr $add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] expect { + -re "privilege to preform this action" { + set access_err 1 + exp_continue + } -re "Adding Cluster" { incr amatches exp_continue @@ -130,7 +139,10 @@ expect { wait } } - +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} if {$amatches != 12} { send_user "\nFAILURE: sacctmgr had a problem adding clusters\n" set exit_code 1 diff --git a/testsuite/expect/test3.1 b/testsuite/expect/test3.1 index 10d286e32..34eaa662f 100755 --- a/testsuite/expect/test3.1 +++ b/testsuite/expect/test3.1 @@ -33,9 +33,42 @@ source ./globals set test_id "3.1" set exit_code 0 +set file_in "test$test_id.input" print_header $test_id +# +# Have a job running when the reconfigure happens +# +make_bash_script $file_in " + $srun $bin_sleep 60 +" +set job_id 0 +set sbatch_pid [spawn $sbatch -N1 --output=/dev/null -t2 $file_in] +expect { + -re "Submitted batch job ($number)" { + set job_id $expect_out(1,string) + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + catch {exec $bin_kill -KILL $sbatch_pid} + set exit_code 1 + } + eof { + wait + } +} +if {$job_id == 0} { + send_user "\nFAILURE: sbatch job submit failed\n" + exit 1 +} +if {[wait_for_job $job_id RUNNING] != 0} { + send_user "\nFAILURE: waiting for job $job_id to start\n" + cancel_job $job_id + exit 1 +} + # # Issue scontrol reconfigure # @@ -57,8 +90,14 @@ expect { wait } } +if {[wait_for_job $job_id RUNNING] != 0} { + send_user "\nFAILURE: job $job_id not running after reconfiguration\n" + set exit_code 1 +} +cancel_job $job_id if {$exit_code == 0} { + exec $bin_rm -f file_in send_user "\nSUCCESS\n" } exit $exit_code diff --git a/testsuite/expect/test3.2 b/testsuite/expect/test3.2 index 8696ca456..d354eb1c1 100755 --- a/testsuite/expect/test3.2 +++ b/testsuite/expect/test3.2 @@ -46,10 +46,10 @@ print_header $test_id # Execute a srun job in the specified partition name # Returns 0 on successful completion, returns 1 otherwise proc run_job { part_name } { - global bin_hostname srun timeout + global bin_printenv srun timeout set rc 1 - set srun_pid [spawn $srun --output=none --error=none -p $part_name -N1-128 -t1 $bin_hostname] + set srun_pid [spawn $srun --output=none --error=none -p $part_name -N1-128 -t1 $bin_printenv SLURMD_NODENAME] expect { -re "Unable to contact" { send_user "\nFAILURE: slurm appears to be down\n" @@ -81,7 +81,7 @@ proc run_job { part_name } { # spawn $scontrol show part expect { - -re "PartitionName=($alpha_numeric) " { + -re "PartitionName=($alpha_numeric_under) " { if {[string compare $part_name ""] == 0} { set part_name $expect_out(1,string) } @@ -152,7 +152,7 @@ if {$job_runable == 0} { # spawn $scontrol update PartitionName=$part_name State=$part_new_state expect { - -re "slurm_update error: ($alpha_numeric) ($alpha_numeric)" { + -re "slurm_update error: ($alpha_numeric_under) ($alpha_numeric_under)" { set access_err 0 set err_msg1 $expect_out(1,string) set err_msg2 $expect_out(2,string) diff --git a/testsuite/expect/test3.3 b/testsuite/expect/test3.3 index 9ccee66e7..47636f630 100755 --- a/testsuite/expect/test3.3 +++ b/testsuite/expect/test3.3 @@ -46,14 +46,14 @@ print_header $test_id # spawn $scontrol show node expect { - -re "NodeName=($alpha_numeric) State=IDLE " { + -re "NodeName=($alpha_numeric_under) State=IDLE " { if {[string compare $node_name ""] == 0} { set node_name $expect_out(1,string) set node_old_state "IDLE" } exp_continue } - -re "NodeName=($alpha_numeric) State=ALLOCATED " { + -re "NodeName=($alpha_numeric_under) State=ALLOCATED " { if {[string compare $node_name ""] == 0} { set node_name $expect_out(1,string) set node_old_state "ALLOCATED" @@ -82,7 +82,7 @@ if {[string compare $node_name ""] == 0} { # spawn $scontrol update NodeName=$node_name State=DRAIN Reason=TESTING expect { - -re "slurm_update error: ($alpha_numeric) ($alpha_numeric)" { + -re "slurm_update error: ($alpha_numeric_under) ($alpha_numeric_under)" { set access_err 0 set err_msg1 $expect_out(1,string) set err_msg2 $expect_out(2,string) diff --git a/testsuite/expect/test3.5 b/testsuite/expect/test3.5 index cdbff9e09..401d7352b 100755 --- a/testsuite/expect/test3.5 +++ b/testsuite/expect/test3.5 @@ -169,7 +169,7 @@ if {$allow != 1} { # spawn $bin_id -gn expect { - -re "($alpha_numeric)" { + -re "($alpha_numeric_under)" { set my_group $expect_out(1,string) exp_continue } diff --git a/testsuite/expect/test3.9 b/testsuite/expect/test3.9 index 5f9cc1f4c..818d5b0b5 100755 --- a/testsuite/expect/test3.9 +++ b/testsuite/expect/test3.9 @@ -39,6 +39,7 @@ print_header $test_id # # Issue scontrol reconfigure # +set timeout $max_job_delay set matches 0 spawn $srun -N1 $scontrol show slurmd expect { diff --git a/testsuite/expect/test4.3 b/testsuite/expect/test4.3 index 43bd867f6..3943240cd 100755 --- a/testsuite/expect/test4.3 +++ b/testsuite/expect/test4.3 @@ -78,7 +78,7 @@ expect { incr matches -10 exp_continue } - -re "($end_of_line)($alpha_numeric)(\[ \*\]) *up" { + -re "($end_of_line)($alpha_numeric_under)(\[ \*\]) *up" { if (![string compare $expect_out(3,string) "*"]) { set def_part 1 set def_part_name $expect_out(2,string) diff --git a/testsuite/expect/test4.9 b/testsuite/expect/test4.9 index c8599cd04..8a58ca62d 100755 --- a/testsuite/expect/test4.9 +++ b/testsuite/expect/test4.9 @@ -47,7 +47,7 @@ print_header $test_id set format "--format=partition=%P node_cnt=%D" spawn $sinfo --noheader $format expect { - -re "partition=($alpha_numeric).* node_cnt=($number)" { + -re "partition=($alpha_numeric_under).* node_cnt=($number)" { incr matches exp_continue } @@ -73,7 +73,7 @@ set matches 0 set env(SINFO_FORMAT) "part=%P nodes=%D" spawn $sinfo --noheader expect { - -re "part=($alpha_numeric).* nodes=($number)" { + -re "part=($alpha_numeric_under).* nodes=($number)" { incr matches exp_continue } diff --git a/testsuite/expect/test6.10 b/testsuite/expect/test6.10 index ccbac2166..5d321322f 100755 --- a/testsuite/expect/test6.10 +++ b/testsuite/expect/test6.10 @@ -50,7 +50,7 @@ print_header $test_id set got_login 0 spawn $bin_id -un expect { - -re "($alpha_numeric)" { + -re "($alpha_numeric_under)" { set my_login $expect_out(1,string) set got_login 1 exp_continue @@ -100,7 +100,7 @@ exec $bin_rm -f $file_in set matches 0 spawn $scancel --interactive $job_id expect { - -re "Cancel job_id=$job_id .* partition=($alpha_numeric) .*\? " { + -re "Cancel job_id=$job_id .* partition=($alpha_numeric_under) .*\? " { set default_part $expect_out(1,string) send "n\n" exp_continue diff --git a/testsuite/expect/test6.13 b/testsuite/expect/test6.13 index 9f41d43af..a2945d5bf 100755 --- a/testsuite/expect/test6.13 +++ b/testsuite/expect/test6.13 @@ -46,7 +46,7 @@ print_header $test_id # of the ulimit program is inconsistent across systems. # exec rm -f $file_prog $file_in $file_out -exec cc -o $file_prog $file_prog.c +exec $bin_cc -O -o $file_prog ${file_prog}.c make_bash_script $file_in " $srun ./$file_prog " diff --git a/testsuite/expect/test6.8 b/testsuite/expect/test6.8 index ec2bb097f..2de2cbbbf 100755 --- a/testsuite/expect/test6.8 +++ b/testsuite/expect/test6.8 @@ -42,6 +42,12 @@ set job_id2 0 print_header $test_id +if {[test_wiki_sched] == 1} { + send_user "\nWARNING: not compatable with sched/wiki (Maui)\n" + send_user " or sched/wiki2 (Moab) schedulers\n" + exit $exit_code +} + # # Build input script file # diff --git a/testsuite/expect/test7.1 b/testsuite/expect/test7.1 index ab4923b0f..dc3a70220 100755 --- a/testsuite/expect/test7.1 +++ b/testsuite/expect/test7.1 @@ -42,6 +42,12 @@ set prio3 -1 print_header $test_id +if {[test_wiki_sched] == 1} { + send_user "\nWARNING: not compatable with sched/wiki (Maui)\n" + send_user " or sched/wiki2 (Moab) schedulers\n" + exit $exit_code +} + make_bash_script "pwd_script" { $bin_pwd } # @@ -54,7 +60,7 @@ expect { exp_continue } timeout { - send_user "\nFAILURE: srun not responding\n" + send_user "\nFAILURE: sbatch not responding\n" slow_kill $sbatch_pid exit 1 } @@ -69,7 +75,7 @@ expect { exp_continue } timeout { - send_user "\nFAILURE: srun not responding\n" + send_user "\nFAILURE: sbatch not responding\n" slow_kill $sbatch_pid exit 1 } diff --git a/testsuite/expect/test7.10 b/testsuite/expect/test7.10 index 322d58a98..61501f53a 100755 --- a/testsuite/expect/test7.10 +++ b/testsuite/expect/test7.10 @@ -56,7 +56,7 @@ exec $bin_echo "int getuid(void) { return ID; }" >>${ld_preload}.c exec $bin_echo "int geteuid(void) { return ID; }" >>${ld_preload}.c exec $bin_echo "int getgid(void) { return ID; }" >>${ld_preload}.c exec $bin_echo "int getegid(void) { return ID; }" >>${ld_preload}.c -exec $bin_cc -c -o ${ld_preload}.lo ${ld_preload}.c +exec $bin_cc -c -fPIC -o ${ld_preload}.lo ${ld_preload}.c exec $bin_cc -shared -o ${ld_preload}.so ${ld_preload}.lo global env diff --git a/testsuite/expect/test7.11 b/testsuite/expect/test7.11 new file mode 100755 index 000000000..196d75ce6 --- /dev/null +++ b/testsuite/expect/test7.11 @@ -0,0 +1,257 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# Test of SPANK plugin +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Morris Jette <jette1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "7.11" +set cwd "[$bin_pwd]" +set exit_code 0 +set file_in "${cwd}/test${test_id}.in" +set file_out "${cwd}/test${test_id}.out" +set file_prog "${cwd}/test${test_id}.prog" +set orig_spank_conf "${cwd}/test${test_id}.orig_conf" +set new_spank_conf "${cwd}/test${test_id}.new_conf" +set spank_out "${cwd}/test${test_id}.spank.out" + +print_header $test_id + +if {[test_super_user] == 0} { + send_user "\nWARNING: This test must be run as SlurmUser\n" + exit 0 +} +if {[test_aix] == 1} { + send_user "WARNING: Test is incompatible with AIX\n" + exit 0 +} + +# +# Build the plugin +# +exec $bin_rm -f ${file_prog}.so +exec $bin_cc -shared -I${slurm_dir}/include -o ${file_prog}.so ${file_prog}.c + +# +# Locate slurm.conf's directory, copy the original plugstack.conf file +# and create an updated one using our new plugin +# +log_user 0 +set config_dir "" +spawn $scontrol show config +expect { + -re "SLURM_CONFIG_FILE.*= (/.*)/slurm.conf" { + set config_dir $expect_out(1,string) + exp_continue + } + eof { + wait + } +} +log_user 1 +if {[string compare $config_dir ""] == 0} { + send_user "\nFAILURE: Could not locate slurm.conf directory\n" + exit 1 +} +set spank_conf_file ${config_dir}/plugstack.conf +exec $bin_rm -f $orig_spank_conf $new_spank_conf $file_out $spank_out +if {[file exists $spank_conf_file]} { + spawn $bin_cat $spank_conf_file + expect { + -re "test${test_id}" { + send_user "\nFAILURE: spank plugin includes vestigial test${test_id}\n" + send_user " You probably should manually remove it from $spank_conf_file.\n" + send_user " It was probably left over from some previous test failure.\n" + exit 1 + } + eof { + wait + } + } + + exec $bin_cp $spank_conf_file $orig_spank_conf + exec $bin_cp $spank_conf_file $new_spank_conf +} else { + exec $bin_cp /dev/null $new_spank_conf +} + +exec $bin_echo "required ${file_prog}.so ${spank_out}" >>$new_spank_conf +spawn $bin_cp $new_spank_conf $spank_conf_file +expect { + -re "Permission denied" { + send_user "\nWARNING: User lacks permission to update plugstack_conf file\n" + exit 0 + } + eof { + wait + } +} + +# +# Test of srun help message +# +# NOTE: Expect parsing failures have been noted running "$srun --help" +# directly, so we build a script containing a pipe to tail +# +make_bash_script $file_in "$srun --help | tail -n 20" +set matches 0 +set srun_pid [spawn $file_in] +expect { + -re "Component of slurm test suite" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + slow_kill $srun_pid + set exit_code 1 + } + eof { + wait + } +} +if {$matches != 1} { + send_user "\nFAILURE: spank help message not in srun help message\n" + set exit_code 1 +} + +# +# Test of locally logged messages(). +# We update the plugstatck.conf in this script since NFS delays may +# prevent it from having the same state as the submit host. +# +make_bash_script $file_in " + $bin_cp $new_spank_conf $spank_conf_file + $srun --test_suite=5 $bin_uname + if test -f $orig_spank_conf + then + $bin_cp $orig_spank_conf $spank_conf_file + fi +" +spawn $sbatch -N1 -t1 -o $file_out $file_in +expect { + -re timeout { + send_user "\nFAILURE: sbatch not responding\n" + set exit_code 1 + } + eof { + wait + } +} + +# NOTE: spank logs from sbatch and srun would be intermingled here +if {[wait_for_file $file_out] == 0} { + send_user "\n\n" + set match1 0 + set match2 0 + set match3 0 + spawn $bin_cat $file_out + expect { + -re "error" { + send_user "\nFAILURE: some error happened\n" + set exit_code 1 + exp_continue + } + -re "_test_opt_process: test_suite: opt_arg=5" { + set match1 1 + exp_continue + } + -re "slurm_spank_local_user_init" { + set match2 1 + exp_continue + } + -re "slurm_spank_exit: opt_arg=5" { + set match3 1 + exp_continue + } + eof { + wait + } + } + if {[expr $match1 + $match2 + $match3] != 3} { + send_user "\nFAILURE: local (srun) spank plugin failure\n" + set exit_code 1 + } else { + send_user "\nlocal (srun) spank plugin success\n" + } +} else { + set exit_code 1 +} + +# +# Test for slurmd output in file +# +if {[wait_for_file $spank_out] == 0} { + send_user "\n\n" + set matches 0 + spawn $bin_cat $spank_out + expect { + -re "slurm_spank_task_init: opt_arg=5" { + incr matches + exp_continue + } + -re "spank_get_item: my_uid=" { + incr matches + exp_continue + } + -re "slurm_spank_exit: opt_arg=5" { + incr matches + exp_continue + } + eof { + wait + } + } + if {$matches != 3} { + send_user "\nFAILURE: remote (slurmd) spank plugin failure\n" + set exit_code 1 + } else { + send_user "\n remote (slurmd) spank plugin success\n" + } +} else { + send_user "\nWARNING: This can be caused by the plugstack.conf file not\n" + send_user " being propagated to the compute node or not being in a\n" + send_user " shared file system.\n" + set exit_code 1 +} + +# +# Restore the original plugstack +# +if {[file exists $orig_spank_conf]} { + exec $bin_cp $orig_spank_conf $spank_conf_file +} else { + exec $bin_rm -f $spank_conf_file +} + +if {$exit_code == 0} { + exec $bin_rm -f $orig_spank_conf $new_spank_conf $file_out $spank_out ${file_prog}.so + send_user "\nSUCCESS\n" +} +exit $exit_code diff --git a/testsuite/expect/test7.11.prog.c b/testsuite/expect/test7.11.prog.c new file mode 100644 index 000000000..914960617 --- /dev/null +++ b/testsuite/expect/test7.11.prog.c @@ -0,0 +1,120 @@ +/*****************************************************************************\ + * prog7.11.prog.c - SPANK plugin for testing purposes + ***************************************************************************** + * Copyright (C) 2008 Lawrence Livermore National Security. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Morris Jette <jette1@llnl.gov> + * LLNL-CODE-402394. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/resource.h> +#include <sys/types.h> + +#include <slurm/spank.h> + +/* + * All spank plugins must define this macro for the SLURM plugin loader. + */ +SPANK_PLUGIN(test_suite, 1); + +static int opt_arg = 0; +static char *opt_out_file = NULL; + +static int _test_opt_process(int val, const char *optarg, int remote); + +/* + * Provide a --renice=[prio] option to srun: + */ +struct spank_option spank_options[] = +{ + { "test_suite", "[opt_arg]", "Component of slurm test suite.", 2, 0, + _test_opt_process + }, + SPANK_OPTIONS_TABLE_END +}; + +static int _test_opt_process(int val, const char *optarg, int remote) +{ + opt_arg = atoi(optarg); + if (!remote) + slurm_info("_test_opt_process: test_suite: opt_arg=%d", opt_arg); + + return (0); +} + +/* Called from both srun and slurmd */ +int slurm_spank_init(spank_t sp, int ac, char **av) +{ + if (spank_remote(sp) && (ac == 1)) + opt_out_file = strdup(av[0]); + + return (0); +} + +/* Called from both srun and slurmd, not tested here +int slurm_spank_init_post_opt(spank_t sp, int ac, char **av) */ + +/* Called from srun only */ +slurm_spank_local_user_init(spank_t sp, int ac, char **av) +{ + slurm_info("slurm_spank_local_user_init"); + + return (0); +} + +/* Called from slurmd only */ +int slurm_spank_task_init(spank_t sp, int ac, char **av) +{ + uid_t my_uid; + + if (opt_out_file && opt_arg) { + FILE *fp = fopen(opt_out_file, "a"); + if (!fp) + return (-1); + fprintf(fp, "slurm_spank_task_init: opt_arg=%d\n", opt_arg); + if (spank_get_item(sp, S_JOB_UID, &my_uid) == ESPANK_SUCCESS) + fprintf(fp, "spank_get_item: my_uid=%d\n", my_uid); + fclose(fp); + } + return (0); +} + +/* Called from slurmd only, not tested here +int slurm_spank_task_post_fork(spank_t sp, int ac, char **av) */ + +/* Called from slurmd only, not tested here +int slurm_spank_task_exit(spank_t sp, int ac, char **av) */ + +/* Called from both srun and slurmd */ +int slurm_spank_exit(spank_t sp, int ac, char **av) +{ + if (opt_out_file && opt_arg) { + FILE *fp = fopen(opt_out_file, "a"); + if (!fp) + return (-1); + fprintf(fp, "slurm_spank_exit: opt_arg=%d\n", opt_arg); + fclose(fp); + } else if (opt_arg) + slurm_info("slurm_spank_exit: opt_arg=%d", opt_arg); + return (0); +} diff --git a/testsuite/expect/test7.3 b/testsuite/expect/test7.3 index 2fe04f5df..86fe053a9 100755 --- a/testsuite/expect/test7.3 +++ b/testsuite/expect/test7.3 @@ -50,16 +50,19 @@ if {[test_front_end] != 0} { # file delete $io_prog $test_prog -exec $bin_make -f /dev/null $io_prog +exec $bin_cc -O -o $io_prog ${io_prog}.c exec $bin_chmod 700 $io_prog send_user "slurm_dir is $slurm_dir\n" -if {![test_aix]} { - send_user "$bin_cc ${test_prog}.c -g -pthread -o ${test_prog} -I${slurm_dir}/include -Wl,--rpath=${slurm_dir}/lib -L${slurm_dir}/lib -lslurm\n" - exec $bin_cc ${test_prog}.c -g -pthread -o ${test_prog} -I${slurm_dir}/include -Wl,--rpath=${slurm_dir}/lib -L${slurm_dir}/lib -lslurm -} else { +if {[test_aix]} { send_user "$bin_cc ${test_prog}.c -Wl,-brtl -g -pthread -o ${test_prog} -I${slurm_dir}/include -L${slurm_dir}/lib -lslurm -lntbl\n" exec $bin_cc ${test_prog}.c -Wl,-brtl -g -pthread -o ${test_prog} -I${slurm_dir}/include -L${slurm_dir}/lib -lslurm -lntbl +} elseif [file exists ${slurm_dir}/lib64] { + send_user "$bin_cc ${test_prog}.c -g -pthread -o ${test_prog} -I${slurm_dir}/include -Wl,--rpath=${slurm_dir}/lib64 -L${slurm_dir}/lib64 -lslurm\n" + exec $bin_cc ${test_prog}.c -g -pthread -o ${test_prog} -I${slurm_dir}/include -Wl,--rpath=${slurm_dir}/lib64 -L${slurm_dir}/lib64 -lslurm +} else { + send_user "$bin_cc ${test_prog}.c -g -pthread -o ${test_prog} -I${slurm_dir}/include -Wl,--rpath=${slurm_dir}/lib -L${slurm_dir}/lib -lslurm\n" + exec $bin_cc ${test_prog}.c -g -pthread -o ${test_prog} -I${slurm_dir}/include -Wl,--rpath=${slurm_dir}/lib -L${slurm_dir}/lib -lslurm } exec $bin_chmod 700 $test_prog diff --git a/testsuite/expect/test7.7 b/testsuite/expect/test7.7 index c9e2969a5..c55a44bc1 100755 --- a/testsuite/expect/test7.7 +++ b/testsuite/expect/test7.7 @@ -52,7 +52,7 @@ set sched_wiki 0 exec $bin_rm -f $file_in $file_out $test_prog spawn $scontrol show config expect { - -re "ControlAddr *= ($alpha_numeric)" { + -re "ControlAddr *= ($alpha_numeric_under)" { set control_addr $expect_out(1,string) exp_continue } diff --git a/testsuite/expect/test7.8 b/testsuite/expect/test7.8 index 17c66fa5e..0713a576b 100755 --- a/testsuite/expect/test7.8 +++ b/testsuite/expect/test7.8 @@ -52,7 +52,7 @@ set sched_wiki 0 exec $bin_rm -f $file_in $file_out $test_prog spawn $scontrol show config expect { - -re "ControlAddr *= ($alpha_numeric)" { + -re "ControlAddr *= ($alpha_numeric_under)" { set control_addr $expect_out(1,string) exp_continue } @@ -130,7 +130,7 @@ expect { } } if {$job_id1 == 0} { - send_user "\nFAILURE: srun job submit failed\n" + send_user "\nFAILURE: sbatch job submit failed\n" exit 1 } diff --git a/testsuite/expect/test7.9 b/testsuite/expect/test7.9 index e58d5243a..1915f4d2d 100755 --- a/testsuite/expect/test7.9 +++ b/testsuite/expect/test7.9 @@ -92,7 +92,7 @@ if {$invalid == 1} { # of the ulimit program is inconsistent across systems. # exec $bin_rm -f $file_prog $file_in $file_out -exec $bin_make -f /dev/null $file_prog +exec $bin_cc -O -o $file_prog ${file_prog}.c make_bash_script $file_in " $bin_echo 'testing within script' diff --git a/testsuite/expect/test8.7 b/testsuite/expect/test8.7 index 060a3598e..2f59d6b71 100755 --- a/testsuite/expect/test8.7 +++ b/testsuite/expect/test8.7 @@ -81,7 +81,7 @@ set sched_wiki 0 exec $bin_rm -f $file_in $test_prog spawn $scontrol show config expect { - -re "ControlAddr *= ($alpha_numeric)" { + -re "ControlAddr *= ($alpha_numeric_under)" { set control_addr $expect_out(1,string) exp_continue } diff --git a/testsuite/expect/test9.1 b/testsuite/expect/test9.1 index b2e1f3bb4..a567fd837 100755 --- a/testsuite/expect/test9.1 +++ b/testsuite/expect/test9.1 @@ -9,7 +9,8 @@ # Note: This script generates and then deletes files in the working directory # named test9.1.input and test9.1.output ############################################################################ -# Copyright (C) 2002 The Regents of the University of California. +# Copyright (C) 2002-2007 The Regents of the University of California. +# Copyright (C) 2008 Lawrence Livermore National Security. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette <jette1@llnl.gov> # LLNL-CODE-402394. @@ -34,13 +35,18 @@ source ./globals set test_id "9.1" -set cycle_count 100 set exit_code 0 set file_in "test$test_id.input" set file_out "test$test_id.output" set job_name "test$test_id" set task_cnt $max_stress_tasks +if {[test_wiki_sched] == 1} { + set cycle_count 5 +} else { + set cycle_count 100 +} + if { [test_bluegene] } { set node_cnt 1-2048 } else { @@ -105,7 +111,11 @@ for {set inx 0} {$inx < $cycle_count} {incr inx} { set stdout_lines [get_line_cnt $file_out] } if {$stdout_lines != $stdout_target} { - send_user "\nFAILURE: stdout is incomplete\n" + if {$stdout_lines == 0} { + send_user "\nFAILURE: stdout is empty, is current working directory writable from compute nodes?\n" + } else { + send_user "\nFAILURE:stdout is incomplete\n" + } set cycle_count [expr $inx + 1] set exit_code 1 break diff --git a/testsuite/expect/test9.2 b/testsuite/expect/test9.2 index 34a71af12..7d0ef733f 100755 --- a/testsuite/expect/test9.2 +++ b/testsuite/expect/test9.2 @@ -9,7 +9,8 @@ # Note: This script generates and then deletes files in the working directory # named test9.2.output ############################################################################ -# Copyright (C) 2002-2006 The Regents of the University of California. +# Copyright (C) 2002-2007 The Regents of the University of California. +# Copyright (C) 2008 Lawrence Livermore National Security. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette <jette1@llnl.gov> # LLNL-CODE-402394. @@ -34,11 +35,16 @@ source ./globals set test_id "9.2" -set cycle_count 100 set exit_code 0 set file_out "test$test_id.output" set job_name "test$test_id" +if {[test_wiki_sched] == 1} { + set cycle_count 5 +} else { + set cycle_count 100 +} + if { [test_bluegene] } { set node_cnt 1-2048 } else { @@ -57,10 +63,10 @@ print_header $test_id # wait for completion # Returns 0 on successful completion, returns 1 otherwise proc run_hostname_job { output_file } { - global bin_hostname bin_rm job_name number srun node_cnt other_opts task_cnt timeout + global bin_printenv bin_rm job_name number srun node_cnt other_opts task_cnt timeout exec $bin_rm -f $output_file - set srun_pid [spawn $srun --job-name=$job_name --input=/dev/null --output=$output_file --error=/dev/null -n$task_cnt -N$node_cnt $other_opts -t1 $bin_hostname] + set srun_pid [spawn $srun --job-name=$job_name --input=/dev/null --output=$output_file --error=/dev/null -n$task_cnt -N$node_cnt $other_opts -t1 $bin_printenv SLURMD_NODENAME] expect { -re "Unable to contact" { send_user "\nFAILURE: slurm appears to be down\n" @@ -95,7 +101,11 @@ for {set inx 0} {$inx < $cycle_count} {incr inx} { set stdout_lines [get_line_cnt $file_out] } if {$stdout_lines != $task_cnt} { - send_user "\nFAILURE:stdout is incomplete\n" + if {$stdout_lines == 0} { + send_user "\nFAILURE: stdout is empty, is current working directory writable from compute nodes?\n" + } else { + send_user "\nFAILURE:stdout is incomplete\n" + } set cycle_count [expr $inx + 1] set exit_code 1 break diff --git a/testsuite/expect/test9.3 b/testsuite/expect/test9.3 index 7a392520b..63b497653 100755 --- a/testsuite/expect/test9.3 +++ b/testsuite/expect/test9.3 @@ -9,7 +9,8 @@ # Note: This script generates and then deletes files in the working directory # named test9.3.output ############################################################################ -# Copyright (C) 2002-2006 The Regents of the University of California. +# Copyright (C) 2002-2007 The Regents of the University of California. +# Copyright (C) 2008 Lawrence Livermore National Security. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette <jette1@llnl.gov> # LLNL-CODE-402394. @@ -34,12 +35,17 @@ source ./globals set test_id "9.3" -set cycle_count 100 set exit_code 0 set file_in "test$test_id.input" set file_out "test$test_id.output" set job_name "test$test_id" +if {[test_wiki_sched] == 1} { + set cycle_count 5 +} else { + set cycle_count 100 +} + if { [test_bluegene] } { set node_cnt 1-2048 } else { @@ -105,7 +111,11 @@ for {set inx 0} {$inx < $cycle_count} {incr inx} { set stdout_lines [get_line_cnt $file_out] } if {$stdout_lines != $stdout_target} { - send_user "\nFAILURE:stdout is incomplete\n" + if {$stdout_lines == 0} { + send_user "\nFAILURE: stdout is empty, is current working directory writable from compute nodes?\n" + } else { + send_user "\nFAILURE:stdout is incomplete\n" + } set cycle_count [expr $inx + 1] set exit_code 1 break diff --git a/testsuite/expect/test9.4 b/testsuite/expect/test9.4 index 0b7d95750..46fc81406 100755 --- a/testsuite/expect/test9.4 +++ b/testsuite/expect/test9.4 @@ -10,10 +10,12 @@ # we create them ahead of time. Without explicity file creation, # this test requires about one minute per cycle to execute. # -# Note: This script generates and then deletes files in the working directory -# named test9.4.input, test9.4.[0-9]+.input, and test9.4.[0-9]+.output +# Note: This script generates and then deletes files in the working +# directory named test9.4.input, test9.4.[0-9]+.input, and +# test9.4.[0-9]+.output ############################################################################ -# Copyright (C) 2002-2006 The Regents of the University of California. +# Copyright (C) 2002-2007 The Regents of the University of California. +# Copyright (C) 2008 Lawrence Livermore National Security. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette <jette1@llnl.gov> # LLNL-CODE-402394. @@ -38,13 +40,18 @@ source ./globals set test_id "9.4" -set cycle_count 100 set exit_code 0 set file_in "test$test_id.input" set file_in_task "test$test_id.%t.input" set file_out_task "test$test_id.%t.output" set job_name "test$test_id" +if {[test_wiki_sched] == 1} { + set cycle_count 5 +} else { + set cycle_count 100 +} + if { [test_bluegene] } { set node_cnt 1-2048 } else { @@ -116,10 +123,9 @@ expect { set success_cnt 0 set timeout $max_job_delay for {set inx 0} {$inx < $cycle_count} {incr inx} { -# Create empty output files (to avoid NFS delays) for {set tsk 0} {$tsk < $task_cnt} {incr tsk} { set file_out_glob "test$test_id.$tsk.output" - exec $bin_cp /dev/null $file_out_glob + exec $bin_rm -f $file_out_glob } if {[run_cat_job $file_in_task $file_out_task]} { set exit_code 1 @@ -135,7 +141,11 @@ for {set inx 0} {$inx < $cycle_count} {incr inx} { set stdout_lines [get_line_cnt $file_out_glob] } if {$stdout_lines != $stdin_lines} { - send_user "\nFAILURE:stdout is incomplete\n" + if {$stdout_lines == 0} { + send_user "\nFAILURE: stdout is empty, is current working directory writable from compute nodes?\n" + } else { + send_user "\nFAILURE:stdout is incomplete\n" + } set exit_code 1 } else { incr success_cnt diff --git a/testsuite/expect/test9.5 b/testsuite/expect/test9.5 index fb8a46b9b..83004e005 100755 --- a/testsuite/expect/test9.5 +++ b/testsuite/expect/test9.5 @@ -6,7 +6,8 @@ # "FAILURE: ..." otherwise with an explanation of the failure, OR # anything else indicates a failure mode that must be investigated. ############################################################################ -# Copyright (C) 2002-2006 The Regents of the University of California. +# Copyright (C) 2002-2007 The Regents of the University of California. +# Copyright (C) 2008 Lawrence Livermore National Security. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette <jette1@llnl.gov> # LLNL-CODE-402394. @@ -31,13 +32,18 @@ source ./globals set test_id "9.5" -set cycle_count 100 set exit_code 0 set file_in "test$test_id.input" set file_in_task "test$test_id.%t.input" set file_out "test$test_id.output" set job_name "test$test_id" +if {[test_wiki_sched] == 1} { + set cycle_count 5 +} else { + set cycle_count 100 +} + if { [test_bluegene] } { set node_cnt 1-2048 } else { @@ -119,7 +125,11 @@ for {set inx 0} {$inx < $cycle_count} {incr inx} { set stdout_lines [get_line_cnt $file_out] } if {$stdout_lines != $stdout_target} { - send_user "\nFAILURE:stdout is missing output\n" + if {$stdout_lines == 0} { + send_user "\nFAILURE: stdout is empty, is current working directory writable from compute nodes?\n" + } else { + send_user "\nFAILURE:stdout is incomplete\n" + } set exit_code 1 } else { incr success_cnt diff --git a/testsuite/expect/test9.6 b/testsuite/expect/test9.6 index a057883f1..346e22f6c 100755 --- a/testsuite/expect/test9.6 +++ b/testsuite/expect/test9.6 @@ -6,10 +6,11 @@ # "FAILURE: ..." otherwise with an explanation of the failure, OR # anything else indicates a failure mode that must be investigated. # -# Note: This script generates and then deletes files in the working directory +# Note: This script generates and then deletes files in the working directory # named test9.6.input and test9.6.output ############################################################################ -# Copyright (C) 2002-2006 The Regents of the University of California. +# Copyright (C) 2002-2007 The Regents of the University of California. +# Copyright (C) 2008 Lawrence Livermore National Security. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette <jette1@llnl.gov> # LLNL-CODE-402394. @@ -34,12 +35,17 @@ source ./globals set test_id "9.6" -set cycle_count 100 set exit_code 0 set file_in "test$test_id.input" set file_out "test$test_id.output" set job_name "test$test_id" +if {[test_wiki_sched] == 1} { + set cycle_count 5 +} else { + set cycle_count 100 +} + if { [test_bluegene] } { set node_cnt 1-2048 } else { @@ -102,11 +108,15 @@ for {set inx 0} {$inx < $cycle_count} {incr inx} { } set stdout_lines [get_line_cnt $file_out] if {$stdout_lines != $stdout_target} { - exec $bin_sleep 1 + exec $bin_sleep 1 set stdout_lines [get_line_cnt $file_out] } if {$stdout_lines != $stdout_target} { - send_user "\nFAILURE:stdout is missing output\n" + if {$stdout_lines == 0} { + send_user "\nFAILURE: stdout is empty, is current working directory writable from compute nodes?\n" + } else { + send_user "\nFAILURE:stdout is incomplete\n" + } set cycle_count [expr $inx + 1] set exit_code 1 break diff --git a/testsuite/expect/test9.8 b/testsuite/expect/test9.8 index 494e51ab4..2f4129174 100755 --- a/testsuite/expect/test9.8 +++ b/testsuite/expect/test9.8 @@ -111,7 +111,7 @@ set user_name "" exec $bin_sleep [expr $delay + 6] spawn $bin_id -un expect { - -re "($alpha_numeric)" { + -re "($alpha_numeric_under)" { set user_name $expect_out(1,string) } eof { -- GitLab