From 9bb326fd7da7d8879ce48e110d665279dc6ef42b Mon Sep 17 00:00:00 2001 From: Mehdi Dogguy <mehdi@debian.org> Date: Mon, 8 Sep 2014 21:31:33 +0200 Subject: [PATCH] Imported Upstream version 1.3.6 --- META | 6 +- NEWS | 57 +- configure | 3 +- configure.ac | 3 +- contribs/perlapi/libslurm-perl/msg.h | 23 +- doc/html/accounting.shtml | 9 +- doc/html/configurator.html.in | 8 +- doc/html/cons_res.shtml | 220 +- doc/html/cons_res_share.shtml | 37 +- doc/html/footer.txt | 4 +- doc/html/gang_scheduling.shtml | 753 +++---- doc/html/header.txt | 4 +- doc/html/moab.shtml | 26 +- doc/html/preempt.shtml | 19 +- doc/html/quickstart.shtml | 27 +- doc/man/Makefile.am | 1 + doc/man/Makefile.in | 1 + doc/man/man1/sacctmgr.1 | 68 +- doc/man/man1/salloc.1 | 34 +- doc/man/man1/sbatch.1 | 33 +- doc/man/man1/srun.1 | 35 +- doc/man/man3/slurm_checkpoint_error.3 | 4 +- doc/man/man3/slurm_free_job_info_msg.3 | 21 +- doc/man/man3/slurm_load_job.3 | 1 + doc/man/man3/slurm_step_ctx_create.3 | 2 +- doc/man/man3/slurm_step_launch.3 | 49 +- doc/man/man5/slurm.conf.5 | 94 +- doc/man/man8/spank.8 | 16 +- slurm.spec | 13 +- slurm/slurm.h.in | 23 +- slurm/spank.h | 19 +- src/api/Makefile.am | 31 +- src/api/Makefile.in | 76 +- src/api/allocate.c | 7 +- src/api/config_info.c | 28 +- src/api/init_msg.c | 148 +- src/api/job_info.c | 58 +- src/api/step_ctx.c | 16 +- src/api/step_launch.c | 3 +- src/common/Makefile.am | 18 +- src/common/Makefile.in | 33 +- src/common/assoc_mgr.c | 107 +- src/common/assoc_mgr.h | 7 + src/common/jobacct_common.c | 9 +- src/common/jobacct_common.h | 35 +- src/common/pack.c | 50 +- src/common/pack.h | 4 +- src/common/plugin.c | 2 +- src/common/plugstack.c | 80 +- src/common/print_fields.c | 283 +-- src/common/print_fields.h | 22 +- src/common/read_config.c | 60 +- src/common/read_config.h | 4 +- src/common/slurm_accounting_storage.c | 766 +++++-- src/common/slurm_accounting_storage.h | 202 +- src/common/slurm_cred.c | 50 +- src/common/slurm_cred.h | 9 +- src/common/slurm_protocol_defs.c | 120 ++ src/common/slurm_protocol_defs.h | 14 +- src/common/slurm_protocol_pack.c | 2 + src/common/slurmdbd_defs.c | 340 ++- src/common/slurmdbd_defs.h | 44 +- src/common/stepd_api.c | 4 +- src/database/Makefile.am | 1 - src/database/mysql_common.h | 11 +- src/database/pgsql_common.h | 11 +- .../accounting_storage/filetxt/Makefile.am | 1 - .../filetxt/accounting_storage_filetxt.c | 87 +- .../filetxt/filetxt_jobacct_process.c | 208 +- .../filetxt/filetxt_jobacct_process.h | 5 +- .../gold/accounting_storage_gold.c | 219 +- .../accounting_storage/mysql/Makefile.am | 5 +- .../accounting_storage/mysql/Makefile.in | 10 +- .../mysql/accounting_storage_mysql.c | 1877 +++++++++++++---- .../mysql/mysql_jobacct_process.c | 69 +- .../mysql/mysql_jobacct_process.h | 11 +- .../accounting_storage/mysql/mysql_rollup.c | 18 +- .../none/accounting_storage_none.c | 24 + .../accounting_storage/pgsql/Makefile.am | 5 +- .../accounting_storage/pgsql/Makefile.in | 10 +- .../pgsql/accounting_storage_pgsql.c | 135 +- .../pgsql/pgsql_jobacct_process.c | 129 +- .../pgsql/pgsql_jobacct_process.h | 1 + .../slurmdbd/accounting_storage_slurmdbd.c | 205 +- src/plugins/checkpoint/xlch/Makefile.am | 4 - src/plugins/checkpoint/xlch/Makefile.in | 4 +- src/plugins/jobcomp/Makefile.am | 2 +- src/plugins/jobcomp/Makefile.in | 2 +- .../jobcomp/filetxt/filetxt_jobcomp_process.c | 8 +- src/plugins/jobcomp/script/jobcomp_script.c | 9 +- src/plugins/jobcomp/slurmdbd/Makefile.am | 13 - src/plugins/jobcomp/slurmdbd/Makefile.in | 555 ----- .../jobcomp/slurmdbd/jobcomp_slurmdbd.c | 139 -- src/plugins/mpi/mvapich/mvapich.c | 33 +- .../select/bluegene/plugin/Makefile.am | 4 + .../select/bluegene/plugin/Makefile.in | 4 + src/plugins/select/cons_res/dist_tasks.c | 18 +- src/plugins/select/cons_res/select_cons_res.c | 41 +- src/plugins/select/linear/select_linear.c | 119 +- src/sacct/Makefile.am | 7 +- src/sacct/Makefile.in | 11 +- src/sacct/options.c | 723 +++++-- src/sacct/sacct.c | 14 +- src/sacct/sacct.h | 2 +- src/sacctmgr/Makefile.am | 14 +- src/sacctmgr/Makefile.in | 21 +- src/sacctmgr/account_functions.c | 509 +++-- src/sacctmgr/association_functions.c | 148 +- src/sacctmgr/cluster_functions.c | 199 +- src/sacctmgr/common.c | 334 ++- src/sacctmgr/file_functions.c | 447 ++-- src/sacctmgr/qos_functions.c | 414 ++++ src/sacctmgr/sacctmgr.c | 104 +- src/sacctmgr/sacctmgr.h | 18 +- src/sacctmgr/txn_functions.c | 242 +++ src/sacctmgr/user_functions.c | 650 +++--- src/salloc/Makefile.am | 3 +- src/salloc/Makefile.in | 7 +- src/salloc/opt.c | 39 +- src/salloc/opt.h | 2 +- src/salloc/salloc.c | 6 +- src/sattach/Makefile.am | 2 +- src/sattach/Makefile.in | 5 +- src/sbatch/Makefile.am | 3 +- src/sbatch/Makefile.in | 7 +- src/sbatch/opt.c | 41 +- src/sbatch/opt.h | 4 +- src/sbatch/sbatch.c | 4 +- src/sbcast/Makefile.am | 2 +- src/sbcast/Makefile.in | 4 +- src/scancel/Makefile.am | 2 +- src/scancel/Makefile.in | 4 +- src/scontrol/Makefile.am | 3 +- src/scontrol/Makefile.in | 7 +- src/scontrol/info_job.c | 39 +- src/scontrol/update_job.c | 11 +- src/sinfo/Makefile.am | 2 +- src/sinfo/Makefile.in | 4 +- src/slurmctld/job_mgr.c | 147 +- src/slurmctld/job_scheduler.c | 15 +- src/slurmctld/job_scheduler.h | 4 +- src/slurmctld/node_scheduler.c | 15 +- src/slurmctld/proc_req.c | 114 +- src/slurmctld/slurmctld.h | 25 +- src/slurmctld/srun_comm.c | 22 + src/slurmctld/srun_comm.h | 6 + src/slurmctld/step_mgr.c | 8 +- src/slurmd/slurmd/Makefile.am | 3 +- src/slurmd/slurmd/Makefile.in | 9 +- src/slurmd/slurmd/req.c | 105 +- src/slurmd/slurmstepd/Makefile.am | 4 +- src/slurmd/slurmstepd/Makefile.in | 8 +- src/slurmd/slurmstepd/mgr.c | 6 +- src/slurmd/slurmstepd/slurmstepd_job.c | 7 +- src/slurmd/slurmstepd/slurmstepd_job.h | 3 +- src/slurmd/slurmstepd/task.c | 2 +- src/slurmdbd/Makefile.am | 2 +- src/slurmdbd/Makefile.in | 4 +- src/slurmdbd/proc_req.c | 250 ++- src/smap/Makefile.am | 2 +- src/smap/Makefile.in | 4 +- src/squeue/Makefile.am | 2 +- src/squeue/Makefile.in | 4 +- src/squeue/print.c | 1 + src/squeue/sort.c | 2 + src/squeue/squeue.c | 35 +- src/sreport/Makefile.am | 6 +- src/sreport/Makefile.in | 10 +- src/sreport/cluster_reports.c | 63 +- src/sreport/common.c | 95 +- src/sreport/job_reports.c | 213 +- src/sreport/sreport.h | 3 +- src/sreport/user_reports.c | 64 +- src/srun/Makefile.am | 4 +- src/srun/Makefile.in | 9 +- src/srun/allocate.c | 37 +- src/srun/allocate.h | 5 +- src/srun/opt.c | 57 +- src/srun/opt.h | 4 +- src/srun/srun.c | 30 +- src/sstat/Makefile.am | 4 +- src/sstat/Makefile.in | 7 +- src/sstat/options.c | 226 +- src/sstat/sstat.c | 32 +- src/sstat/sstat.h | 2 +- src/strigger/Makefile.am | 2 +- src/strigger/Makefile.in | 4 +- src/sview/Makefile.am | 3 +- src/sview/Makefile.in | 4 +- src/sview/popups.c | 7 +- src/sview/sview.h | 1 + testsuite/expect/README | 7 +- testsuite/expect/globals | 66 + testsuite/expect/test1.23 | 2 +- testsuite/expect/test1.35 | 22 +- testsuite/expect/test1.38 | 15 +- testsuite/expect/test1.49 | 2 +- testsuite/expect/test1.7 | 12 +- testsuite/expect/test1.87 | 2 +- testsuite/expect/test1.91 | 37 +- testsuite/expect/test15.7 | 2 +- testsuite/expect/test17.10 | 2 +- testsuite/expect/test19.3 | 7 +- testsuite/expect/test19.4 | 7 +- testsuite/expect/test19.5 | 7 +- testsuite/expect/test19.6 | 7 +- testsuite/expect/test19.7 | 7 +- testsuite/expect/test21.5 | 98 +- testsuite/expect/test21.6 | 103 +- testsuite/expect/test21.7 | 225 ++ testsuite/expect/test21.8 | 292 +++ testsuite/expect/test21.9 | 292 +++ testsuite/expect/test5.6 | 2 +- 213 files changed, 10524 insertions(+), 4693 deletions(-) create mode 100644 doc/man/man3/slurm_load_job.3 delete mode 100644 src/plugins/jobcomp/slurmdbd/Makefile.am delete mode 100644 src/plugins/jobcomp/slurmdbd/Makefile.in delete mode 100644 src/plugins/jobcomp/slurmdbd/jobcomp_slurmdbd.c create mode 100644 src/sacctmgr/qos_functions.c create mode 100644 src/sacctmgr/txn_functions.c create mode 100755 testsuite/expect/test21.7 create mode 100755 testsuite/expect/test21.8 create mode 100755 testsuite/expect/test21.9 diff --git a/META b/META index b59079a2b..b498c1bb5 100644 --- a/META +++ b/META @@ -3,9 +3,9 @@ Api_revision: 0 Major: 1 Meta: 1 - Micro: 5 + Micro: 6 Minor: 3 Name: slurm Release: 1 - Release_tags: dist - Version: 1.3.5 + Release_tags: + Version: 1.3.6 diff --git a/NEWS b/NEWS index bb862d7c5..da0360eb0 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,35 @@ This file describes changes in recent versions of SLURM. It primarily documents those changes that are of interest to users and admins. +* Changes in SLURM 1.3.6 +======================== + -- Add new function to get information for a single job rather than always + getting information for all jobs. Improved performance of some commands. + NOTE: This new RPC means that the slurmctld daemons should be updated + before or at the same time as the compute nodes in order to process it. + -- In salloc, sbatch, and srun replace --task-mem options with --mem-per-cpu + (--task-mem will continue to be accepted for now, but is not documented). + Replace DefMemPerTask and MaxMemPerTask with DefMemPerCPU, DefMemPerNode, + MaxMemPerCPU and MaxMemPerNode in slurm.conf (old options still accepted + for now, but mapped to "PerCPU" parameters and not documented). Allocate + a job's memory memory at the same time that processors are allocated based + upon the --mem or --mem-per-cpu option rather than when job steps are + initiated. + -- Altered QOS in accounting to be a list of admin defined states, an + account or user can have multiple QOS's now. They need to be defined using + 'sacctmgr add qos'. They are no longer an enum. If none are defined + Normal will be the QOS for everything. Right now this is only for use + with MOAB. Does nothing outside of that. + -- Added spank_get_item support for field S_STEP_CPUS_PER_TASK. + -- Make corrections in spank_get_item for field S_JOB_NCPUS, previously + reported task count rather than CPU count. + -- Convert configuration parameter PrivateData from on/off flag to have + separate flags for job, partition, and node data. See "man slurm.conf" + for details. + -- Fix bug, failed to load DisableRootJobs configuration parameter. + -- Altered sacctmgr to always return a non-zero exit code on error and send + error messages to stderr. + * Changes in SLURM 1.3.5 ======================== -- Fix processing of auth/munge authtentication key for messages originating @@ -13,8 +42,9 @@ documents those changes that are of interest to users and admins. -- Added more support for "dumping" account information to a flat file and read in again to protect data incase something bad happens to the database. -- Sacct will now report account names for job steps. - -- For AIX: Remove MP_POERESTART_ENV environment variable, disabling poerestart - command. User must explicitly set MP_POERESTART_ENV before executing poerestart. + -- For AIX: Remove MP_POERESTART_ENV environment variable, disabling + poerestart command. User must explicitly set MP_POERESTART_ENV before + executing poerestart. -- Put back notification that a job has been allocated resources when it was pending. @@ -373,21 +403,32 @@ documents those changes that are of interest to users and admins. Moved existing digital signature logic into new plugin: crypto/openssl. Added new support for crypto/munge (available with GPL license). +* Changes in SLURM 1.2.33 +========================= + -- Cancelled or Failed jobs will now report their job and step id on exit + -- Add SPANK items available to get: SLURM_VERSION, SLURM_VERSION_MAJOR, + SLURM_VERISON_MINOR and SLURM_VERSION_MICRO. + -- Fixed handling of SIGPIPE in srun. Abort job. + -- Fix bug introduced to MVAPICH plugin preventing use of TotalView debugger. + -- Modify slurmctld to get srun/salloc network address based upon the incoming + message rather than hostname set by the user command (backport of logic in + SLURM v1.3). + * Changes in SLURM 1.2.32 ========================= - -- Disable scancel of job in RootOnly partition only for sched/wiki2 (Moab). - Permit user scancel from other configurations (e.g. LSF). + -- LSF only: Enable scancel of job in RootOnly partition by the job's owner. -- Add support for sbatch --distribution and --network options. -- Correct pending job's wait reason to "Priority" rather than "Resources" if required resources are being held in reserve for a higher priority job. - -- In sched/wiki2 (Moab) report a node's state as "Drained" rather than "Draining" - if it has no allocated work (An undocumented Moab wiki option, see CRI - ticket #2394). + -- In sched/wiki2 (Moab) report a node's state as "Drained" rather than + "Draining" if it has no allocated work (An undocumented Moab wiki option, + see CRI ticket #2394). -- Log to job's output when it is cancelled or reaches it's time limit (ported from existing code in slurm v1.3). -- Add support in salloc and sbatch commands for --network option. -- Add support for user environment variables that include '\n' (e.g. bash functions). + -- Partial rewrite of mpi/mvapich plugin for improved scalability. * Changes in SLURM 1.2.31 ========================= @@ -3328,4 +3369,4 @@ documents those changes that are of interest to users and admins. -- Change directory to /tmp in slurmd if daemonizing. -- Logfiles are reopened on reconfigure. -$Id: NEWS 14453 2008-07-08 20:26:18Z da $ +$Id: NEWS 14583 2008-07-21 17:18:13Z da $ diff --git a/configure b/configure index fe472cf3a..785c74f70 100755 --- a/configure +++ b/configure @@ -27196,7 +27196,7 @@ _ACEOF -ac_config_files="$ac_config_files Makefile config.xml auxdir/Makefile contribs/Makefile contribs/perlapi/Makefile contribs/perlapi/libslurm-perl/Makefile.PL contribs/torque/Makefile contribs/phpext/Makefile contribs/phpext/slurm_php/config.m4 src/Makefile src/api/Makefile src/common/Makefile src/database/Makefile src/sacct/Makefile src/sacctmgr/Makefile src/sreport/Makefile src/sstat/Makefile src/salloc/Makefile src/sbatch/Makefile src/sattach/Makefile src/srun/Makefile src/slurmd/Makefile src/slurmd/slurmd/Makefile src/slurmd/slurmstepd/Makefile src/slurmdbd/Makefile src/slurmctld/Makefile src/sbcast/Makefile src/scontrol/Makefile src/scancel/Makefile src/squeue/Makefile src/sinfo/Makefile src/smap/Makefile src/strigger/Makefile src/sview/Makefile src/plugins/Makefile src/plugins/accounting_storage/Makefile src/plugins/accounting_storage/filetxt/Makefile src/plugins/accounting_storage/gold/Makefile src/plugins/accounting_storage/mysql/Makefile src/plugins/accounting_storage/pgsql/Makefile src/plugins/accounting_storage/none/Makefile src/plugins/accounting_storage/slurmdbd/Makefile src/plugins/auth/Makefile src/plugins/auth/authd/Makefile src/plugins/auth/munge/Makefile src/plugins/auth/none/Makefile src/plugins/checkpoint/Makefile src/plugins/checkpoint/aix/Makefile src/plugins/checkpoint/none/Makefile src/plugins/checkpoint/ompi/Makefile src/plugins/checkpoint/xlch/Makefile src/plugins/crypto/Makefile src/plugins/crypto/munge/Makefile src/plugins/crypto/openssl/Makefile src/plugins/jobacct_gather/Makefile src/plugins/jobacct_gather/linux/Makefile src/plugins/jobacct_gather/aix/Makefile src/plugins/jobacct_gather/none/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/jobcomp/pgsql/Makefile src/plugins/jobcomp/slurmdbd/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/pgid/Makefile src/plugins/proctrack/linuxproc/Makefile src/plugins/proctrack/rms/Makefile src/plugins/proctrack/sgi_job/Makefile src/plugins/sched/Makefile src/plugins/sched/backfill/Makefile src/plugins/sched/builtin/Makefile src/plugins/sched/gang/Makefile src/plugins/sched/hold/Makefile src/plugins/sched/wiki/Makefile src/plugins/sched/wiki2/Makefile src/plugins/select/Makefile src/plugins/select/bluegene/Makefile src/plugins/select/bluegene/block_allocator/Makefile src/plugins/select/bluegene/plugin/Makefile src/plugins/select/linear/Makefile src/plugins/select/cons_res/Makefile src/plugins/switch/Makefile src/plugins/switch/elan/Makefile src/plugins/switch/none/Makefile src/plugins/switch/federation/Makefile src/plugins/mpi/Makefile src/plugins/mpi/mpich1_p4/Makefile src/plugins/mpi/mpich1_shmem/Makefile src/plugins/mpi/mpichgm/Makefile src/plugins/mpi/mpichmx/Makefile src/plugins/mpi/mvapich/Makefile src/plugins/mpi/lam/Makefile src/plugins/mpi/none/Makefile src/plugins/mpi/openmpi/Makefile src/plugins/task/Makefile src/plugins/task/affinity/Makefile src/plugins/task/none/Makefile doc/Makefile doc/man/Makefile doc/html/Makefile doc/html/configurator.html testsuite/Makefile testsuite/expect/Makefile testsuite/slurm_unit/Makefile testsuite/slurm_unit/common/Makefile testsuite/slurm_unit/slurmctld/Makefile testsuite/slurm_unit/slurmd/Makefile testsuite/slurm_unit/api/Makefile testsuite/slurm_unit/api/manual/Makefile" +ac_config_files="$ac_config_files Makefile config.xml auxdir/Makefile contribs/Makefile contribs/perlapi/Makefile contribs/perlapi/libslurm-perl/Makefile.PL contribs/torque/Makefile contribs/phpext/Makefile contribs/phpext/slurm_php/config.m4 src/Makefile src/api/Makefile src/common/Makefile src/database/Makefile src/sacct/Makefile src/sacctmgr/Makefile src/sreport/Makefile src/sstat/Makefile src/salloc/Makefile src/sbatch/Makefile src/sattach/Makefile src/srun/Makefile src/slurmd/Makefile src/slurmd/slurmd/Makefile src/slurmd/slurmstepd/Makefile src/slurmdbd/Makefile src/slurmctld/Makefile src/sbcast/Makefile src/scontrol/Makefile src/scancel/Makefile src/squeue/Makefile src/sinfo/Makefile src/smap/Makefile src/strigger/Makefile src/sview/Makefile src/plugins/Makefile src/plugins/accounting_storage/Makefile src/plugins/accounting_storage/filetxt/Makefile src/plugins/accounting_storage/gold/Makefile src/plugins/accounting_storage/mysql/Makefile src/plugins/accounting_storage/pgsql/Makefile src/plugins/accounting_storage/none/Makefile src/plugins/accounting_storage/slurmdbd/Makefile src/plugins/auth/Makefile src/plugins/auth/authd/Makefile src/plugins/auth/munge/Makefile src/plugins/auth/none/Makefile src/plugins/checkpoint/Makefile src/plugins/checkpoint/aix/Makefile src/plugins/checkpoint/none/Makefile src/plugins/checkpoint/ompi/Makefile src/plugins/checkpoint/xlch/Makefile src/plugins/crypto/Makefile src/plugins/crypto/munge/Makefile src/plugins/crypto/openssl/Makefile src/plugins/jobacct_gather/Makefile src/plugins/jobacct_gather/linux/Makefile src/plugins/jobacct_gather/aix/Makefile src/plugins/jobacct_gather/none/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/jobcomp/pgsql/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/pgid/Makefile src/plugins/proctrack/linuxproc/Makefile src/plugins/proctrack/rms/Makefile src/plugins/proctrack/sgi_job/Makefile src/plugins/sched/Makefile src/plugins/sched/backfill/Makefile src/plugins/sched/builtin/Makefile src/plugins/sched/gang/Makefile src/plugins/sched/hold/Makefile src/plugins/sched/wiki/Makefile src/plugins/sched/wiki2/Makefile src/plugins/select/Makefile src/plugins/select/bluegene/Makefile src/plugins/select/bluegene/block_allocator/Makefile src/plugins/select/bluegene/plugin/Makefile src/plugins/select/linear/Makefile src/plugins/select/cons_res/Makefile src/plugins/switch/Makefile src/plugins/switch/elan/Makefile src/plugins/switch/none/Makefile src/plugins/switch/federation/Makefile src/plugins/mpi/Makefile src/plugins/mpi/mpich1_p4/Makefile src/plugins/mpi/mpich1_shmem/Makefile src/plugins/mpi/mpichgm/Makefile src/plugins/mpi/mpichmx/Makefile src/plugins/mpi/mvapich/Makefile src/plugins/mpi/lam/Makefile src/plugins/mpi/none/Makefile src/plugins/mpi/openmpi/Makefile src/plugins/task/Makefile src/plugins/task/affinity/Makefile src/plugins/task/none/Makefile doc/Makefile doc/man/Makefile doc/html/Makefile doc/html/configurator.html testsuite/Makefile testsuite/expect/Makefile testsuite/slurm_unit/Makefile testsuite/slurm_unit/common/Makefile testsuite/slurm_unit/slurmctld/Makefile testsuite/slurm_unit/slurmd/Makefile testsuite/slurm_unit/api/Makefile testsuite/slurm_unit/api/manual/Makefile" cat >confcache <<\_ACEOF @@ -27976,7 +27976,6 @@ do "src/plugins/jobcomp/script/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobcomp/script/Makefile" ;; "src/plugins/jobcomp/mysql/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobcomp/mysql/Makefile" ;; "src/plugins/jobcomp/pgsql/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobcomp/pgsql/Makefile" ;; - "src/plugins/jobcomp/slurmdbd/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobcomp/slurmdbd/Makefile" ;; "src/plugins/proctrack/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/Makefile" ;; "src/plugins/proctrack/aix/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/aix/Makefile" ;; "src/plugins/proctrack/pgid/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/pgid/Makefile" ;; diff --git a/configure.ac b/configure.ac index 56782ee48..d2c076132 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -# $Id: configure.ac 14147 2008-05-28 22:46:41Z da $ +# $Id: configure.ac 14598 2008-07-21 20:12:45Z da $ # This file is to be processed with autoconf to generate a configure script dnl Prologue @@ -333,7 +333,6 @@ AC_CONFIG_FILES([Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/jobcomp/pgsql/Makefile - src/plugins/jobcomp/slurmdbd/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/pgid/Makefile diff --git a/contribs/perlapi/libslurm-perl/msg.h b/contribs/perlapi/libslurm-perl/msg.h index 23c17b34e..2f36413ff 100644 --- a/contribs/perlapi/libslurm-perl/msg.h +++ b/contribs/perlapi/libslurm-perl/msg.h @@ -57,9 +57,12 @@ inline static int av_store_int(AV* av, int index, int val) */ inline static int hv_store_charp(HV* hv, const char *key, charp val) { - SV* sv = newSVpv(val, 0); - - if (hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { + SV* sv = NULL; + + if(val) + sv = newSVpv(val, 0); + + if (!key || hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { SvREFCNT_dec(sv); return -1; } @@ -73,7 +76,7 @@ inline static int hv_store_uint32_t(HV* hv, const char *key, uint32_t val) { SV* sv = newSVuv(val); - if (hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { + if (!key || hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { SvREFCNT_dec(sv); return -1; } @@ -87,7 +90,7 @@ inline static int hv_store_uint16_t(HV* hv, const char *key, uint16_t val) { SV* sv = newSVuv(val); - if (hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { + if (!key || hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { SvREFCNT_dec(sv); return -1; } @@ -101,7 +104,7 @@ inline static int hv_store_uint8_t(HV* hv, const char *key, uint8_t val) { SV* sv = newSVuv(val); - if (hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { + if (!key || hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { SvREFCNT_dec(sv); return -1; } @@ -114,7 +117,7 @@ inline static int hv_store_int(HV* hv, const char *key, int val) { SV* sv = newSViv(val); - if (hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { + if (!key || hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { SvREFCNT_dec(sv); return -1; } @@ -126,7 +129,7 @@ inline static int hv_store_int(HV* hv, const char *key, int val) */ inline static int hv_store_bool(HV* hv, const char *key, bool val) { - if (hv_store(hv, key, (I32)strlen(key), (val ? &PL_sv_yes : &PL_sv_no), 0) == NULL) { + if (!key || hv_store(hv, key, (I32)strlen(key), (val ? &PL_sv_yes : &PL_sv_no), 0) == NULL) { return -1; } return 0; @@ -139,7 +142,7 @@ inline static int hv_store_time_t(HV* hv, const char *key, time_t val) { SV* sv = newSVuv(val); - if (hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { + if (!key || hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { SvREFCNT_dec(sv); return -1; } @@ -151,7 +154,7 @@ inline static int hv_store_time_t(HV* hv, const char *key, time_t val) */ inline static int hv_store_sv(HV* hv, const char *key, SV* sv) { - if (hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { + if (!key || hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { return -1; } return 0; diff --git a/doc/html/accounting.shtml b/doc/html/accounting.shtml index 6a2138ab8..657db1605 100644 --- a/doc/html/accounting.shtml +++ b/doc/html/accounting.shtml @@ -45,7 +45,7 @@ or SlurmDBD for added security.</li> </ul> <p>The use of sacct or sstat to view information about completed jobs -is dependent upon both JobAcctGatherType and JobAcctGatherType +is dependent upon both JobAcctGatherType and AccountingStorageType being configured to collect and store that information. The use of sreport is dependent upon some database being used to store that information.</p> @@ -59,7 +59,7 @@ pathname of the file (e.g. <i>JobCompLoc=/var/log/slurm/job_completions</i>). Use the <i>logrotate</i> or similar tool to prevent the log files from getting too large. -Send a SIGHUP signal to the <i>slurmctld</i> deaemon +Send a SIGHUP signal to the <i>slurmctld</i> daemon after moving the files, but before compressing them so that new log files will be created.</p> @@ -253,7 +253,7 @@ warning message.</li> The port number that the Slurm Database Daemon (slurmdbd) listens to for work. The default value is SLURMDBD_PORT as established at system build time. If none is explicitly specified, it will be set to 6819. -This value must be equal to the <i>SlurmDbdPort</i> parameter in the +This value must be equal to the <i>AccountingStoragePort</i> parameter in the slurm.conf file.</li> <li><b>LogFile</b>: @@ -566,4 +566,5 @@ completely. This is meant to clean up after typographic errors.</p> <p style="text-align: center;">Last modified 27 June 2008</p> -</ul></body></html> +<!--#include virtual="footer.txt"--> + diff --git a/doc/html/configurator.html.in b/doc/html/configurator.html.in index 2b61e0922..a10f6f157 100644 --- a/doc/html/configurator.html.in +++ b/doc/html/configurator.html.in @@ -161,7 +161,7 @@ function displayfile() "MpiDefault=" + get_radio_value(document.config.mpi_default) + "<br>" + "#PluginDir= <br>" + "#PlugStackConfig= <br>" + - "#PrivateData=0 <br>" + + "#PrivateData=jobs <br>" + "ProctrackType=proctrack/" + get_radio_value(document.config.proctrack_type) + "<br>" + get_field("Prolog",document.config.prolog) + "<br>" + "#PropagatePrioProcess=0 <br>" + @@ -206,9 +206,9 @@ function displayfile() "# <br>" + "# <br>" + "# SCHEDULING <br>" + - "#DefMemPerTask=0 <br>" + + "#DefMemPerCPU=0 <br>" + "FastSchedule=" + get_radio_value(document.config.fast_schedule) + "<br>" + - "#MaxMemPerTask=0 <br>" + + "#MaxMemPerCPU=0 <br>" + "#SchedulerRootFilter=1 <br>" + "#SchedulerTimeSlice=30 <br>" + "SchedulerType=sched/" + get_radio_value(document.config.sched_type) + "<br>" + @@ -812,6 +812,6 @@ before terminating all remaining tasks. A value of zero indicates unlimited wait </FORM> <HR> <P class="footer">LLNL-WEB-402631<BR> -Last modified 1 April 2008</P> +Last modified 17 July 2008</P> </BODY> diff --git a/doc/html/cons_res.shtml b/doc/html/cons_res.shtml index ae4f02293..368810a9e 100644 --- a/doc/html/cons_res.shtml +++ b/doc/html/cons_res.shtml @@ -28,15 +28,15 @@ this plug-in is described below. slurm.conf (e.g. <i>SelectType=select/cons_res</i>).</li> <pre> # -# "SelectType" : node selection logic for scheduling. -# "select/bluegene" : the default on BlueGene systems, aware of -# system topology, manages bglblocks, etc. -# "select/cons_res" : allocate individual consumable resources -# (i.e. processors, memory, etc.) -# "select/linear" : the default on non-BlueGene systems, -# no topology awareness, oriented toward -# allocating nodes to jobs rather than -# resources within a node (e.g. CPUs) +# "SelectType" : node selection logic for scheduling. +# "select/bluegene" : the default on BlueGene systems, aware of +# system topology, manages bglblocks, etc. +# "select/cons_res" : allocate individual consumable resources +# (i.e. processors, memory, etc.) +# "select/linear" : the default on non-BlueGene systems, +# no topology awareness, oriented toward +# allocating nodes to jobs rather than +# resources within a node (e.g. CPUs) # # SelectType=select/linear SelectType=select/cons_res @@ -98,15 +98,15 @@ SelectType=select/cons_res SelectTypeParameter in the slurm.conf.</li> <pre> # -# "SelectType" : node selection logic for scheduling. -# "select/bluegene" : the default on BlueGene systems, aware of -# system topology, manages bglblocks, etc. -# "select/cons_res" : allocate individual consumable resources -# (i.e. processors, memory, etc.) -# "select/linear" : the default on non-BlueGene systems, -# no topology awareness, oriented toward -# allocating nodes to jobs rather than -# resources within a node (e.g. CPUs) +# "SelectType" : node selection logic for scheduling. +# "select/bluegene" : the default on BlueGene systems, aware of +# system topology, manages bglblocks, etc. +# "select/cons_res" : allocate individual consumable resources +# (i.e. processors, memory, etc.) +# "select/linear" : the default on non-BlueGene systems, +# no topology awareness, oriented toward +# allocating nodes to jobs rather than +# resources within a node (e.g. CPUs) # # SelectType=select/linear SelectType=select/cons_res @@ -115,34 +115,33 @@ SelectType=select/cons_res # - select/bluegene - this parameter is currently ignored # - select/linear - this parameter is currently ignored # - select/cons_res - the parameters available are -# - CR_CPU (1) - CPUs as consumable resources. -# No notion of sockets, cores, or threads. -# On a multi-core system CPUs will be cores -# On a multi-core/hyperthread system CPUs will -# be threads -# On a single-core systems CPUs are CPUs. ;-) -# - CR_Socket (2) - Sockets as a consumable resource. -# - CR_Core (3) - Cores as a consumable resource. -# (Not yet implemented) -# - CR_Memory (4) - Memory as a consumable resource. -# Note! CR_Memory assumes Shared=Yes -# - CR_Socket_Memory (5) - Socket and Memory as consumable -# resources. -# - CR_Core_Memory (6) - Core and Memory as consumable -# resources. (Not yet implemented) -# - CR_CPU_Memory (7) - CPU and Memory as consumable -# resources. +# - CR_CPU (1) - CPUs as consumable resources. +# No notion of sockets, cores, or threads. +# On a multi-core system CPUs will be cores +# On a multi-core/hyperthread system CPUs +# will be threads +# On a single-core systems CPUs are CPUs. +# - CR_Socket (2) - Sockets as a consumable resource. +# - CR_Core (3) - Cores as a consumable resource. +# - CR_Memory (4) - Memory as a consumable resource. +# Note! CR_Memory assumes Shared=Yes +# - CR_Socket_Memory (5) - Socket and Memory as consumable +# resources. +# - CR_Core_Memory (6) - Core and Memory as consumable +# resources. (Not yet implemented) +# - CR_CPU_Memory (7) - CPU and Memory as consumable +# resources. # # (#) refer to the output of "scontrol show config" # -# NB!: The -E extension for sockets, cores, and threads -# are ignored within the node allocation mechanism -# when CR_CPU or CR_CPU_MEMORY is selected. -# They are considered to compute the total number of -# tasks when -n is not specified +# NB!: The -E extension for sockets, cores, and threads +# are ignored within the node allocation mechanism +# when CR_CPU or CR_CPU_MEMORY is selected. +# They are considered to compute the total number of +# tasks when -n is not specified # # NB! All CR_s assume Shared=No or Shared=Force EXCEPT for -# CR_MEMORY which assumes Shared=Yes +# CR_MEMORY which assumes Shared=Yes # #SelectTypeParameters=CR_CPU (default) </pre> @@ -169,7 +168,7 @@ SelectType=select/cons_res way as when using the default node selection scheme.</li> <li>The <i>--exclusive</i> srun switch allows users to request nodes in exclusive mode even when consumable resources is enabled. see "man srun" - for details. </li> + for details. </li> <li>srun's <i>-s</i> or <i>--share</i> is incompatible with the consumable resource environment and will therefore not be honored. Since in this environment nodes are shared by default, <i>--exclusive</i> allows users to obtain dedicated nodes.</li> @@ -213,19 +212,18 @@ Please send comments and requests about the consumable resources to # srun sleep 100 & # srun sleep 100 & # squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 1132 allNodes sleep sballe R 0:05 1 hydra12 - 1133 allNodes sleep sballe R 0:04 1 hydra12 - 1134 allNodes sleep sballe R 0:02 1 hydra12 +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 1132 allNodes sleep sballe R 0:05 1 hydra12 + 1133 allNodes sleep sballe R 0:04 1 hydra12 + 1134 allNodes sleep sballe R 0:02 1 hydra12 # srun -N 2-2 -E 2:2 sleep 100 & srun: job 1135 queued and waiting for resources #squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 1135 allNodes sleep sballe PD 0:00 2 (Resources) - 1132 allNodes sleep sballe R 0:24 1 hydra12 - 1133 allNodes sleep sballe R 0:23 1 hydra12 - 1134 allNodes sleep sballe R 0:21 1 hydra12 -# +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 1135 allNodes sleep sballe PD 0:00 2 (Resources) + 1132 allNodes sleep sballe R 0:24 1 hydra12 + 1133 allNodes sleep sballe R 0:23 1 hydra12 + 1134 allNodes sleep sballe R 0:21 1 hydra12 </pre> <li><b>Proposed solution:</b> Enhance the selection mechanism to go through {node,socket,core,thread}-tuplets to find available match for specific request (bounded knapsack problem). </li> </ul> @@ -248,7 +246,7 @@ srun: job 1135 queued and waiting for resources <h2>Examples of CR_Memory, CR_Socket_Memory, and CR_CPU_Memory type consumable resources</h2> <pre> -sinfo -lNe +# sinfo -lNe NODELIST NODES PARTITION STATE CPUS S:C:T MEMORY hydra[12-16] 5 allNodes* ... 4 2:2:1 2007 </pre> @@ -256,59 +254,59 @@ hydra[12-16] 5 allNodes* ... 4 2:2:1 2007 <p>Using select/cons_res plug-in with CR_Memory</p> <pre> Example: -srun -N 5 -n 20 --job-mem=1000 sleep 100 & <-- running -srun -N 5 -n 20 --job-mem=10 sleep 100 & <-- running -srun -N 5 -n 10 --job-mem=1000 sleep 100 & <-- queued and waiting for resources - -squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 1820 allNodes sleep sballe PD 0:00 5 (Resources) - 1818 allNodes sleep sballe R 0:17 5 hydra[12-16] - 1819 allNodes sleep sballe R 0:11 5 hydra[12-16] +# srun -N 5 -n 20 --job-mem=1000 sleep 100 & <-- running +# srun -N 5 -n 20 --job-mem=10 sleep 100 & <-- running +# srun -N 5 -n 10 --job-mem=1000 sleep 100 & <-- queued and waiting for resources + +# squeue +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 1820 allNodes sleep sballe PD 0:00 5 (Resources) + 1818 allNodes sleep sballe R 0:17 5 hydra[12-16] + 1819 allNodes sleep sballe R 0:11 5 hydra[12-16] </pre> <p>Using select/cons_res plug-in with CR_Socket_Memory (2 sockets/node)</p> <pre> Example 1: -srun -N 5 -n 5 --job-mem=1000 sleep 100 & <-- running -srun -n 1 -w hydra12 --job-mem=2000 sleep 100 & <-- queued and waiting for resources +# srun -N 5 -n 5 --job-mem=1000 sleep 100 & <-- running +# srun -n 1 -w hydra12 --job-mem=2000 sleep 100 & <-- queued and waiting for resources -squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 1890 allNodes sleep sballe PD 0:00 1 (Resources) - 1889 allNodes sleep sballe R 0:08 5 hydra[12-16] +# squeue +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 1890 allNodes sleep sballe PD 0:00 1 (Resources) + 1889 allNodes sleep sballe R 0:08 5 hydra[12-16] Example 2: -srun -N 5 -n 10 --job-mem=10 sleep 100 & <-- running -srun -n 1 --job-mem=10 sleep 100 & <-- queued and waiting for resourcessqueue +# srun -N 5 -n 10 --job-mem=10 sleep 100 & <-- running +# srun -n 1 --job-mem=10 sleep 100 & <-- queued and waiting for resourcessqueue -squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 1831 allNodes sleep sballe PD 0:00 1 (Resources) - 1830 allNodes sleep sballe R 0:07 5 hydra[12-16] +# squeue +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 1831 allNodes sleep sballe PD 0:00 1 (Resources) + 1830 allNodes sleep sballe R 0:07 5 hydra[12-16] </pre> <p>Using select/cons_res plug-in with CR_CPU_Memory (4 CPUs/node)</p> <pre> Example 1: -srun -N 5 -n 5 --job-mem=1000 sleep 100 & <-- running -srun -N 5 -n 5 --job-mem=10 sleep 100 & <-- running -srun -N 5 -n 5 --job-mem=1000 sleep 100 & <-- queued and waiting for resources +# srun -N 5 -n 5 --job-mem=1000 sleep 100 & <-- running +# srun -N 5 -n 5 --job-mem=10 sleep 100 & <-- running +# srun -N 5 -n 5 --job-mem=1000 sleep 100 & <-- queued and waiting for resources -squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 1835 allNodes sleep sballe PD 0:00 5 (Resources) - 1833 allNodes sleep sballe R 0:10 5 hydra[12-16] - 1834 allNodes sleep sballe R 0:07 5 hydra[12-16] +# squeue +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 1835 allNodes sleep sballe PD 0:00 5 (Resources) + 1833 allNodes sleep sballe R 0:10 5 hydra[12-16] + 1834 allNodes sleep sballe R 0:07 5 hydra[12-16] Example 2: -srun -N 5 -n 20 --job-mem=10 sleep 100 & <-- running -srun -n 1 --job-mem=10 sleep 100 & <-- queued and waiting for resources +# srun -N 5 -n 20 --job-mem=10 sleep 100 & <-- running +# srun -n 1 --job-mem=10 sleep 100 & <-- queued and waiting for resources -squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 1837 allNodes sleep sballe PD 0:00 1 (Resources) - 1836 allNodes sleep sballe R 0:11 5 hydra[12-16] +# squeue +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 1837 allNodes sleep sballe PD 0:00 1 (Resources) + 1836 allNodes sleep sballe R 0:11 5 hydra[12-16] </pre> <p class="footer"><a href="#top">top</a></p> @@ -365,11 +363,11 @@ have one idle cpu and linux04 has 3 idle cpus.</p> <pre> # squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 3 lsf sleep root PD 0:00 3 (Resources) - 4 lsf sleep root PD 0:00 1 (Resources) - 5 lsf sleep root PD 0:00 1 (Resources) - 2 lsf sleep root R 0:14 4 xc14n[13-16] +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 3 lsf sleep root PD 0:00 3 (Resources) + 4 lsf sleep root PD 0:00 1 (Resources) + 5 lsf sleep root PD 0:00 1 (Resources) + 2 lsf sleep root R 0:14 4 xc14n[13-16] </pre> <p>Once Job 2 is finished, Job 3 is scheduled and runs on @@ -381,10 +379,10 @@ and Job 4 can run concurrently on the cluster.</p> <pre> # squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 5 lsf sleep root PD 0:00 1 (Resources) - 3 lsf sleep root R 0:11 3 xc14n[13-15] - 4 lsf sleep root R 0:11 1 xc14n16 +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 5 lsf sleep root PD 0:00 1 (Resources) + 3 lsf sleep root R 0:11 3 xc14n[13-15] + 4 lsf sleep root R 0:11 1 xc14n16 </pre> <p>Once Job 3 finishes, Job 5 is allocated resources and can run.</p> @@ -426,16 +424,16 @@ nodes) and Job 4 is scheduled onto one of the remaining idle cpus on Linux04.</p <pre> # squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 5 lsf sleep root PD 0:00 1 (Resources) - 2 lsf sleep root R 0:13 4 linux[01-04] - 3 lsf sleep root R 0:09 3 linux[01-03] - 4 lsf sleep root R 0:05 1 linux04 +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 5 lsf sleep root PD 0:00 1 (Resources) + 2 lsf sleep root R 0:13 4 linux[01-04] + 3 lsf sleep root R 0:09 3 linux[01-03] + 4 lsf sleep root R 0:05 1 linux04 # sinfo -lNe NODELIST NODES PARTITION STATE CPUS MEMORY TMP_DISK WEIGHT FEATURES REASON -linux[01-03] 3 lsf* allocated 2 2981 1 1 (null) none -linux04 1 lsf* allocated 4 3813 1 1 (null) none +linux[01-03] 3 lsf* allocated 2 2981 1 1 (null) none +linux04 1 lsf* allocated 4 3813 1 1 (null) none </pre> <p>Once Job 2 finishes, Job 5, which was pending, is allocated available resources and is then @@ -443,10 +441,10 @@ running as illustrated below:</p> <pre> # squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 3 lsf sleep root R 1:58 3 linux[01-03] - 4 lsf sleep root R 1:54 1 linux04 - 5 lsf sleep root R 0:02 3 linux[01-03] +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 3 lsf sleep root R 1:58 3 linux[01-03] + 4 lsf sleep root R 1:54 1 linux04 + 5 lsf sleep root R 0:02 3 linux[01-03] # sinfo -lNe NODELIST NODES PARTITION STATE CPUS MEMORY TMP_DISK WEIGHT FEATURES REASON linux[01-03] 3 lsf* allocated 2 2981 1 1 (null) none @@ -457,8 +455,8 @@ linux04 1 lsf* idle 4 3813 1 1 (null) no <pre> # squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 5 lsf sleep root R 1:52 3 linux[01-03] +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 5 lsf sleep root R 1:52 3 linux[01-03] </pre> <p>Job 3 and Job 4 have finshed and Job 5 is still running on nodes linux[01-03].</p> @@ -480,6 +478,6 @@ one mpi process per node.</p> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 25 September 2006</p> +<p style="text-align:center;">Last modified 8 July 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/cons_res_share.shtml b/doc/html/cons_res_share.shtml index 66715e41d..2221f4a2e 100644 --- a/doc/html/cons_res_share.shtml +++ b/doc/html/cons_res_share.shtml @@ -173,7 +173,9 @@ to begin running "on top of" the existing jobs. This happens with the <H3>Memory Management</H3> <P> -The management of memory as a consumable resource remains unchanged: +The management of memory as a consumable resource remains unchanged and +can be used to prevent oversubscription of memory, which would result in +having memory pages swapped out and severely degraded performance. </P> <TABLE CELLPADDING=3 CELLSPACING=1 BORDER=1> <TR><TH>Selection Setting</TH> @@ -202,21 +204,28 @@ available memory to meet the job's memory requirement will not be allocated to the job.</TD> </TR> </TABLE> -<P>Note that the <CODE>srun --mem=<num></CODE> option is only used to -request nodes that have <num> amount of real memory. This option does not -compute memory that is currently available. -</P><P> -The <CODE>srun --job-mem=<num></CODE> option is used with the -<CODE>select/cons_res</CODE> plugin to request available memory from each node. -</P><P> -The <CODE>select/cons_res</CODE> plugin tracks memory usage by each job on each -node regardless of the number partitions a node may be assigned to. The primary -purpose of tracking memory as a consumable resource is to protect jobs from -having their memory pages swapped out because the memory has been overcommitted. -</P> +<P>Users can specify their job's memory requirements one of two ways. +<CODE>--mem=<num></CODE> can be used to specify the job's memory +requirement on a per allocated node basis. This option is probably best +suited for use with the <CODE>select/linear</CODE> plugin, which allocates +whole nodes to jobs. +<CODE>--mem-per-cpu=<num></CODE> can be used to specify the job's +memory requirement on a per allocated CPU basis. This is probably best +suited for use with the <CODE>select/cons_res</CODE> plugin which can +allocate individual CPUs to jobs.</P> + +<P>Default and maximum values for memory on a per node or per CPU basis can +be configued using the following options: <CODE>DefMemPerCPU</CODE>, +<CODE>DefMemPerNode</CODE>, <CODE>MaxMemPerCPU</CODE> and <CODE>MaxMemPerNode</CODE>. +Users can use the <CODE>--mem</CODE> or <CODE>--mem-per-cpu</CODE> option +at job submission time to specify their memory requirements. +Enforcement of a job's memory allocation is performed by the accounting +plugin, which periodically gathers data about running jobs. Set +<CODE>JobAcctGather</CODE> and <CODE>JobAcctFrequency</CODE> to +values suitable for your system.</P> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 27 May 2008</p> +<p style="text-align:center;">Last modified 8 July 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/footer.txt b/doc/html/footer.txt index 49b3c490e..dbed09229 100644 --- a/doc/html/footer.txt +++ b/doc/html/footer.txt @@ -1,8 +1,8 @@ </div> <!-- closes "content" --> <div id="footer"> -<div id="left"> <span class="ucrlnum">LLNL-WEB-402631 |</span> <a href="https://www.llnl.gov/disclaimer.html" target="_blank" class="privacy">Privacy & Legal Notice</a></div> -<div id="right"><span class="ucrlnum">12 March 2008 </span></div> +<div id="left"> <span class="ucrlnum">LLNL-WEB-405518 |</span> <a href="https://www.llnl.gov/disclaimer.html" target="_blank" class="privacy">Privacy & Legal Notice</a></div> +<div id="right"><span class="ucrlnum">18 July 2008 </span></div> </div> <div id="footer2"> diff --git a/doc/html/gang_scheduling.shtml b/doc/html/gang_scheduling.shtml index 66c0b7cf6..e8d37467b 100644 --- a/doc/html/gang_scheduling.shtml +++ b/doc/html/gang_scheduling.shtml @@ -8,29 +8,30 @@ to jobs. Beginning in SLURM version 1.3, gang scheduling is supported. Gang scheduling is when two or more jobs are allocated to the same resources and these jobs are alternately suspended to let all of the tasks of each -job have full access to the shared resources for a period of time. +job have full access to the shared resources for a period of time. </P> -<P> +<P> A resource manager that supports timeslicing can improve it's responsiveness and utilization by allowing more jobs to begin running sooner. Shorter-running jobs no longer have to wait in a queue behind longer-running jobs. Instead they can be run "in parallel" with the longer-running jobs, which will allow them to finish quicker. Throughput is also improved because overcommitting the resources provides opportunities for "local backfilling" to occur (see example -below). +below). </P> -<P> +<P> The SLURM 1.3.0 the <I>sched/gang</I> plugin provides timeslicing. When enabled, -it monitors each of the partitions in SLURM. If a new job has been allocated to -resources in a partition that have already been allocated to an existing job, -then the plugin will suspend the new job until the configured -<I>SchedulerTimeslice</I> interval has elapsed. Then it will suspend the -running job and let the new job make use of the resources for a -<I>SchedulerTimeslice</I> interval. This will continue until one of the -jobs terminates. +it monitors each of the partitions in SLURM. If a new job has been allocated to +resources in a partition that have already been allocated to an existing job, +then the plugin will suspend the new job until the configured +<I>SchedulerTimeslice</I> interval has elapsed. Then it will suspend the +running job and let the new job make use of the resources for a +<I>SchedulerTimeslice</I> interval. This will continue until one of the +jobs terminates. </P> <H2>Configuration</H2> + <P> There are several important configuration parameters relating to gang scheduling: @@ -46,15 +47,20 @@ allocated by the <I>select/cons_res</I> plugin. with jobs, the resource selection plugin should be configured to track the amount of memory used by each job to ensure that memory page swapping does not occur. When <I>select/linear</I> is chosen, we recommend setting -<I>SelectTypeParameter=CR_Memory</I>. When <I>select/cons_res</I> is -chosen, we recommend including Memory as a resource (ex. +<I>SelectTypeParameter=CR_Memory</I>. When <I>select/cons_res</I> is +chosen, we recommend including Memory as a resource (ex. <I>SelectTypeParameter=CR_Core_Memory</I>). </LI> <LI> -<B>DefMemPerTask</B>: Since job requests may not explicitly specify -a memory requirement, we also recommend configuring <I>DefMemPerTask</I> -(default memory per task). It may also be desirable to configure -<I>MaxMemPerTask</I> (maximum memory per task) in <I>slurm.conf</I>. +<B>DefMemPerCPU</B>: Since job requests may not explicitly specify +a memory requirement, we also recommend configuring +<I>DefMemPerCPU</I> (default memory per allocated CPU) or +<I>DefMemPerNode</I> (default memory per allocated node). +It may also be desirable to configure +<I>MaxMemPerCPU</I> (maximum memory per allocated CPU) or +<I>MaxMemPerNode</I> (maximum memory per allocated node) in <I>slurm.conf</I>. +Users can use the <I>--mem</I> or <I>--mem-per-cpu</I> option +at job submission time to specify their memory requirements. </LI> <LI> <B>JobAcctGatherType and JobAcctGatherFrequency</B>: @@ -63,9 +69,9 @@ using the <I>JobAcctGatherType</I> and <I>JobAcctGatherFrequency</I> parameters. If accounting is enabled and a job exceeds its configured memory limits, it will be canceled in order to prevent it from adversely effecting other jobs sharing the same resources. -</LI> +</LI> <LI> -<B>SchedulerType</B>: Configure the <I>sched/gang</I> plugin by setting +<B>SchedulerType</B>: Configure the <I>sched/gang</I> plugin by setting <I>SchedulerType=sched/gang</I> in <I>slurm.conf</I>. </LI> <LI> @@ -88,7 +94,7 @@ allocated to a common resource, set <I>Shared=FORCE:6</I>. In order to enable gang scheduling after making the configuration changes described above, restart SLURM if it is already running. Any change to the plugin settings in SLURM requires a full restart of the daemons. If you -just change the partition <I>Shared</I> setting, this can be updated with +just change the partition <I>Shared</I> setting, this can be updated with <I>scontrol reconfig</I>. </P> <P> @@ -96,377 +102,412 @@ For an advanced topic discussion on the potential use of swap space, see "Making use of swap space" in the "Future Work" section below. </P> -<H2>Timeslicer Design and Operation</H2> +<H2>Timeslicer Design and Operation</H2> <P> -When enabled, the <I>sched/gang</I> plugin keeps track of the resources -allocated to all jobs. For each partition an "active bitmap" is maintained that -tracks all concurrently running jobs in the SLURM cluster. Each time a new -job is allocated to resources in a partition, the <I>sched/gang</I> plugin -compares these newly allocated resources with the resources already maintained -in the "active bitmap". If these two sets of resources are disjoint then the new -job is added to the "active bitmap". If these two sets of resources overlap then -the new job is suspended. All jobs are tracked in a per-partition job queue +When enabled, the <I>sched/gang</I> plugin keeps track of the resources +allocated to all jobs. For each partition an "active bitmap" is maintained that +tracks all concurrently running jobs in the SLURM cluster. Each time a new +job is allocated to resources in a partition, the <I>sched/gang</I> plugin +compares these newly allocated resources with the resources already maintained +in the "active bitmap". If these two sets of resources are disjoint then the new +job is added to the "active bitmap". If these two sets of resources overlap then +the new job is suspended. All jobs are tracked in a per-partition job queue within the <I>sched/gang</I> plugin. </P> <P> -A separate <I>timeslicer thread</I> is spawned by the <I>sched/gang</I> plugin -on startup. This thread sleeps for the configured <I>SchedulerTimeSlice</I> -interval. When it wakes up, it checks each partition for suspended jobs. If -suspended jobs are found then the <I>timeslicer thread</I> moves all running -jobs to the end of the job queue. It then reconstructs the "active bitmap" for -this partition beginning with the suspended job that has waited the longest to -run (this will be the first suspended job in the run queue). Each following job -is then compared with the new "active bitmap", and if the job can be run -concurrently with the other "active" jobs then the job is added. Once this is -complete then the <I>timeslicer thread</I> suspends any currently running jobs -that are no longer part of the "active bitmap", and resumes jobs that are new to +A separate <I>timeslicer thread</I> is spawned by the <I>sched/gang</I> plugin +on startup. This thread sleeps for the configured <I>SchedulerTimeSlice</I> +interval. When it wakes up, it checks each partition for suspended jobs. If +suspended jobs are found then the <I>timeslicer thread</I> moves all running +jobs to the end of the job queue. It then reconstructs the "active bitmap" for +this partition beginning with the suspended job that has waited the longest to +run (this will be the first suspended job in the run queue). Each following job +is then compared with the new "active bitmap", and if the job can be run +concurrently with the other "active" jobs then the job is added. Once this is +complete then the <I>timeslicer thread</I> suspends any currently running jobs +that are no longer part of the "active bitmap", and resumes jobs that are new to the "active bitmap". </P> <P> -This <I>timeslicer thread</I> algorithm for rotating jobs is designed to prevent -jobs from starving (remaining in the suspended state indefinitly) and to be as -fair as possible in the distribution of runtime while still keeping all of the +This <I>timeslicer thread</I> algorithm for rotating jobs is designed to prevent +jobs from starving (remaining in the suspended state indefinitly) and to be as +fair as possible in the distribution of runtime while still keeping all of the resources as busy as possible. </P> <P> -The <I>sched/gang</I> plugin suspends jobs via the same internal functions that -support <I>scontrol suspend</I> and <I>scontrol resume</I>. A good way to -observe the operation of the timeslicer is by running <I>watch squeue</I> in a +The <I>sched/gang</I> plugin suspends jobs via the same internal functions that +support <I>scontrol suspend</I> and <I>scontrol resume</I>. A good way to +observe the operation of the timeslicer is by running <I>watch squeue</I> in a terminal window. </P> -<H2>A Simple Example</H2> +<H2>A Simple Example</H2> <P> -The following example is configured with <I>select/linear</I>, -<I>sched/gang</I>, and <I>Shared=FORCE</I>. This example takes place on a small +The following example is configured with <I>select/linear</I>, +<I>sched/gang</I>, and <I>Shared=FORCE</I>. This example takes place on a small cluster of 5 nodes: -</P> -<PRE> -[user@n16 load]$ <B>sinfo</B> -PARTITION AVAIL TIMELIMIT NODES STATE NODELIST -active* up infinite 5 idle n[12-16] +</P> +<PRE> +[user@n16 load]$ <B>sinfo</B> +PARTITION AVAIL TIMELIMIT NODES STATE NODELIST +active* up infinite 5 idle n[12-16] </PRE> <P> Here are the Scheduler settings (the last two settings are the relevant ones): </P> -<PRE> -[user@n16 load]$ <B>scontrol show config | grep Sched</B> -FastSchedule = 1 -SchedulerPort = 7321 -SchedulerRootFilter = 1 -SchedulerTimeSlice = 30 -SchedulerType = sched/gang -[user@n16 load]$ -</PRE> -<P> -The <I>myload</I> script launches a simple load-generating app that runs +<PRE> +[user@n16 load]$ <B>scontrol show config | grep Sched</B> +FastSchedule = 1 +SchedulerPort = 7321 +SchedulerRootFilter = 1 +SchedulerTimeSlice = 30 +SchedulerType = sched/gang +</PRE> +<P> +The <I>myload</I> script launches a simple load-generating app that runs for the given number of seconds. Submit <I>myload</I> to run on all nodes: </P> -<PRE> -[user@n16 load]$ <B>sbatch -N5 ./myload 300</B> -sbatch: Submitted batch job 3 -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 3 active myload user 0:05 5 n[12-16] +<PRE> +[user@n16 load]$ <B>sbatch -N5 ./myload 300</B> +sbatch: Submitted batch job 3 + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 3 active myload user 0:05 5 n[12-16] </PRE> <P> Submit it again and watch the <I>sched/gang</I> plugin suspend it: </P> -<PRE> -[user@n16 load]$ <B>sbatch -N5 ./myload 300</B> -sbatch: Submitted batch job 4 -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 3 active myload user R 0:13 5 n[12-16] - 4 active myload user S 0:00 5 n[12-16] +<PRE> +[user@n16 load]$ <B>sbatch -N5 ./myload 300</B> +sbatch: Submitted batch job 4 + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 3 active myload user R 0:13 5 n[12-16] + 4 active myload user S 0:00 5 n[12-16] </PRE> <P> -After 30 seconds the <I>sched/gang</I> plugin swaps jobs, and now job 4 is the +After 30 seconds the <I>sched/gang</I> plugin swaps jobs, and now job 4 is the active one: </P> -<PRE> -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 4 active myload user R 0:08 5 n[12-16] - 3 active myload user S 0:41 5 n[12-16] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 4 active myload user R 0:21 5 n[12-16] - 3 active myload user S 0:41 5 n[12-16] +<PRE> +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 4 active myload user R 0:08 5 n[12-16] + 3 active myload user S 0:41 5 n[12-16] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 4 active myload user R 0:21 5 n[12-16] + 3 active myload user S 0:41 5 n[12-16] +</PRE> +<P> +After another 30 seconds the <I>sched/gang</I> plugin sets job 3 running again: +</P> +<PRE> +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 3 active myload user R 0:50 5 n[12-16] + 4 active myload user S 0:30 5 n[12-16] +</PRE> + +<P> +<B>A possible side effect of timeslicing</B>: Note that jobs that are +immediately suspended may cause their srun commands to produce the following +output: +</P> +<PRE> +[user@n16 load]$ <B>cat slurm-4.out</B> +srun: Job step creation temporarily disabled, retrying +srun: Job step creation still disabled, retrying +srun: Job step creation still disabled, retrying +srun: Job step creation still disabled, retrying +srun: Job step created +</PRE> +<P> +This occurs because <I>srun</I> is attempting to launch a jobstep in an +allocation that has been suspended. The <I>srun</I> process will continue in a +retry loop to launch the jobstep until the allocation has been resumed and the +jobstep can be launched. +</P> +<P> +When the <I>sched/gang</I> plugin is enabled, this type of output in the user +jobs should be considered benign. +</P> + +<H2>More examples</H2> + +<P> +The following example shows how the timeslicer algorithm keeps the resources +busy. Job 10 runs continually, while jobs 9 and 11 are timesliced: +</P> + +<PRE> +[user@n16 load]$ <B>sbatch -N3 ./myload 300</B> +sbatch: Submitted batch job 9 + +[user@n16 load]$ <B>sbatch -N2 ./myload 300</B> +sbatch: Submitted batch job 10 + +[user@n16 load]$ <B>sbatch -N3 ./myload 300</B> +sbatch: Submitted batch job 11 + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 9 active myload user R 0:11 3 n[12-14] + 10 active myload user R 0:08 2 n[15-16] + 11 active myload user S 0:00 3 n[12-14] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 10 active myload user R 0:50 2 n[15-16] + 11 active myload user R 0:12 3 n[12-14] + 9 active myload user S 0:41 3 n[12-14] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 10 active myload user R 1:04 2 n[15-16] + 11 active myload user R 0:26 3 n[12-14] + 9 active myload user S 0:41 3 n[12-14] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 9 active myload user R 0:46 3 n[12-14] + 10 active myload user R 1:13 2 n[15-16] + 11 active myload user S 0:30 3 n[12-14] </PRE> -<P> After another 30 seconds the <I>sched/gang</I> plugin sets job 3 running again: -</P> -<PRE> -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 3 active myload user R 0:50 5 n[12-16] - 4 active myload user S 0:30 5 n[12-16] -</PRE> -<P> -<B>A possible side effect of timeslicing</B>: Note that jobs that are -immediately suspended may cause their srun commands to produce the following -output: -</P> -<PRE> -[user@n16 load]$ <B>cat slurm-4.out</B> -srun: Job step creation temporarily disabled, retrying -srun: Job step creation still disabled, retrying -srun: Job step creation still disabled, retrying -srun: Job step creation still disabled, retrying -srun: Job step created +</P> +<P> +The next example displays "local backfilling": +</P> +<PRE> +[user@n16 load]$ <B>sbatch -N3 ./myload 300</B> +sbatch: Submitted batch job 12 + +[user@n16 load]$ <B>sbatch -N5 ./myload 300</B> +sbatch: Submitted batch job 13 + +[user@n16 load]$ <B>sbatch -N2 ./myload 300</B> +sbatch: Submitted batch job 14 + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 12 active myload user R 0:14 3 n[12-14] + 14 active myload user R 0:06 2 n[15-16] + 13 active myload user S 0:00 5 n[12-16] </PRE> -<P> -This occurs because <I>srun</I> is attempting to launch a jobstep in an -allocation that has been suspended. The <I>srun</I> process will continue in a -retry loop to launch the jobstep until the allocation has been resumed and the -jobstep can be launched. -</P> -<P> -When the <I>sched/gang</I> plugin is enabled, this type of output in the user -jobs should be considered benign. -</P> - -<H2>More examples</H2> -<P> -The following example shows how the timeslicer algorithm keeps the resources -busy. Job 10 runs continually, while jobs 9 and 11 are timesliced: -</P> -<PRE> -[user@n16 load]$ <B>sbatch -N3 ./myload 300</B> -sbatch: Submitted batch job 9 -[user@n16 load]$ <B>sbatch -N2 ./myload 300</B> -sbatch: Submitted batch job 10 -[user@n16 load]$ <B>sbatch -N3 ./myload 300</B> -sbatch: Submitted batch job 11 -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 9 active myload user R 0:11 3 n[12-14] - 10 active myload user R 0:08 2 n[15-16] - 11 active myload user S 0:00 3 n[12-14] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 10 active myload user R 0:50 2 n[15-16] - 11 active myload user R 0:12 3 n[12-14] - 9 active myload user S 0:41 3 n[12-14] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 10 active myload user R 1:04 2 n[15-16] - 11 active myload user R 0:26 3 n[12-14] - 9 active myload user S 0:41 3 n[12-14] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 9 active myload user R 0:46 3 n[12-14] - 10 active myload user R 1:13 2 n[15-16] - 11 active myload user S 0:30 3 n[12-14] -[user@n16 load]$ -</PRE> -</P> -<P> -The next example displays "local backfilling": -</P> -<PRE> -[user@n16 load]$ <B>sbatch -N3 ./myload 300</B> -sbatch: Submitted batch job 12 -[user@n16 load]$ <B>sbatch -N5 ./myload 300</B> -sbatch: Submitted batch job 13 -[user@n16 load]$ <B>sbatch -N2 ./myload 300</B> -sbatch: Submitted batch job 14 -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 12 active myload user R 0:14 3 n[12-14] - 14 active myload user R 0:06 2 n[15-16] - 13 active myload user S 0:00 5 n[12-16] -[user@n16 load]$ -</PRE> -<P> -Without timeslicing and without the backfill scheduler enabled, job 14 has to -wait for job 13 to finish. -</P><P> -This is called "local" backfilling because the backfilling only occurs with jobs -close enough in the queue to get allocated by the scheduler as part of -oversubscribing the resources. Recall that the number of jobs that can -overcommit a resource is controlled by the <I>Shared=FORCE:max_share</I> value, -so this value effectively controls the scope of "local backfilling". -</P><P> -Normal backfill algorithms check <U>all</U> jobs in the wait queue. -</P> - -<H2>Consumable Resource Examples</H2> -<P> -The following two examples illustrate the primary difference between -<I>CR_CPU</I> and <I>CR_Core</I> when consumable resource selection is enabled -(<I>select/cons_res</I>). -</P> -<P> -When <I>CR_CPU</I> (or <I>CR_CPU_Memory</I>) is configured then the selector -treats the CPUs as simple, <I>interchangeable</I> computing resources. However -when <I>CR_Core</I> (or <I>CR_Core_Memory</I>) is enabled the selector treats -the CPUs as individual resources that are <U>specifically</U> allocated to jobs. -This subtle difference is highlighted when timeslicing is enabled. -</P> -<P> -In both examples 6 jobs are submitted. Each job requests 2 CPUs per node, and -all of the nodes contain two quad-core processors. The timeslicer will initially -let the first 4 jobs run and suspend the last 2 jobs. The manner in which these -jobs are timesliced depends upon the configured <I>SelectTypeParameter</I>. -</P> -<P> -In the first example <I>CR_Core_Memory</I> is configured. Note that jobs 46 and -47 don't <U>ever</U> get suspended. This is because they are not sharing their -cores with any other job. Jobs 48 and 49 were allocated to the same cores as -jobs 45 and 46. The timeslicer recognizes this and timeslices only those jobs: -</P> -<PRE> -[user@n16 load]$ <B>sinfo</B> -PARTITION AVAIL TIMELIMIT NODES STATE NODELIST -active* up infinite 5 idle n[12-16] -[user@n16 load]$ <B>scontrol show config | grep Select</B> -SelectType = select/cons_res -SelectTypeParameters = CR_CORE_MEMORY -[user@n16 load]$ <B>sinfo -o "%20N %5D %5c %5z"</B> -NODELIST NODES CPUS S:C:T -n[12-16] 5 8 2:4:1 -[user@n16 load]$ -[user@n16 load]$ -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 44 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 45 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 46 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 47 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 48 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 49 -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 44 active myload user R 0:09 5 n[12-16] - 45 active myload user R 0:08 5 n[12-16] - 46 active myload user R 0:08 5 n[12-16] - 47 active myload user R 0:07 5 n[12-16] - 48 active myload user S 0:00 5 n[12-16] - 49 active myload user S 0:00 5 n[12-16] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 46 active myload user R 0:49 5 n[12-16] - 47 active myload user R 0:48 5 n[12-16] - 48 active myload user R 0:06 5 n[12-16] - 49 active myload user R 0:06 5 n[12-16] - 44 active myload user S 0:44 5 n[12-16] - 45 active myload user S 0:43 5 n[12-16] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 44 active myload user R 1:23 5 n[12-16] - 45 active myload user R 1:22 5 n[12-16] - 46 active myload user R 2:22 5 n[12-16] - 47 active myload user R 2:21 5 n[12-16] - 48 active myload user S 1:00 5 n[12-16] - 49 active myload user S 1:00 5 n[12-16] -[user@n16 load]$ -</PRE> -<P> -Note the runtime of all 6 jobs in the output of the last <I>squeue</I> command. -Jobs 46 and 47 have been running continuously, while jobs 45 and 46 are -splitting their runtime with jobs 48 and 49. -</P><P> -The next example has <I>CR_CPU_Memory</I> configured and the same 6 jobs are -submitted. Here the selector and the timeslicer treat the CPUs as countable -resources which results in all 6 jobs sharing time on the CPUs: -</P> -<PRE> -[user@n16 load]$ <B>sinfo</B> -PARTITION AVAIL TIMELIMIT NODES STATE NODELIST -active* up infinite 5 idle n[12-16] -[user@n16 load]$ <B>scontrol show config | grep Select</B> -SelectType = select/cons_res -SelectTypeParameters = CR_CPU_MEMORY -[user@n16 load]$ <B>sinfo -o "%20N %5D %5c %5z"</B> -NODELIST NODES CPUS S:C:T -n[12-16] 5 8 2:4:1 -[user@n16 load]$ -[user@n16 load]$ -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 51 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 52 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 53 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 54 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 55 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 56 -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 51 active myload user R 0:11 5 n[12-16] - 52 active myload user R 0:11 5 n[12-16] - 53 active myload user R 0:10 5 n[12-16] - 54 active myload user R 0:09 5 n[12-16] - 55 active myload user S 0:00 5 n[12-16] - 56 active myload user S 0:00 5 n[12-16] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 51 active myload user R 1:09 5 n[12-16] - 52 active myload user R 1:09 5 n[12-16] - 55 active myload user R 0:23 5 n[12-16] - 56 active myload user R 0:23 5 n[12-16] - 53 active myload user S 0:45 5 n[12-16] - 54 active myload user S 0:44 5 n[12-16] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 53 active myload user R 0:55 5 n[12-16] - 54 active myload user R 0:54 5 n[12-16] - 55 active myload user R 0:40 5 n[12-16] - 56 active myload user R 0:40 5 n[12-16] - 51 active myload user S 1:16 5 n[12-16] - 52 active myload user S 1:16 5 n[12-16] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 51 active myload user R 3:18 5 n[12-16] - 52 active myload user R 3:18 5 n[12-16] - 53 active myload user R 3:17 5 n[12-16] - 54 active myload user R 3:16 5 n[12-16] - 55 active myload user S 3:00 5 n[12-16] - 56 active myload user S 3:00 5 n[12-16] -[user@n16 load]$ -</PRE> -<P> -Note that the runtime of all 6 jobs is roughly equal. Jobs 51-54 ran first so -they're slightly ahead, but so far all jobs have run for at least 3 minutes. -</P><P> -At the core level this means that SLURM relies on the linux kernel to move jobs -around on the cores to maximize performance. This is different than when -<I>CR_Core_Memory</I> was configured and the jobs would effectively remain -"pinned" to their specific cores for the duration of the job. Note that -<I>CR_Core_Memory</I> supports CPU binding, while <I>CR_CPU_Memory</I> does not. +<P> +Without timeslicing and without the backfill scheduler enabled, job 14 has to +wait for job 13 to finish. +</P> +<P> +This is called "local" backfilling because the backfilling only occurs with jobs +close enough in the queue to get allocated by the scheduler as part of +oversubscribing the resources. Recall that the number of jobs that can +overcommit a resource is controlled by the <I>Shared=FORCE:max_share</I> value, +so this value effectively controls the scope of "local backfilling". +</P> +<P> +Normal backfill algorithms check <U>all</U> jobs in the wait queue. +</P> + +<H2>Consumable Resource Examples</H2> + +<P> +The following two examples illustrate the primary difference between +<I>CR_CPU</I> and <I>CR_Core</I> when consumable resource selection is enabled +(<I>select/cons_res</I>). +</P> +<P> +When <I>CR_CPU</I> (or <I>CR_CPU_Memory</I>) is configured then the selector +treats the CPUs as simple, <I>interchangeable</I> computing resources. However +when <I>CR_Core</I> (or <I>CR_Core_Memory</I>) is enabled the selector treats +the CPUs as individual resources that are <U>specifically</U> allocated to jobs. +This subtle difference is highlighted when timeslicing is enabled. +</P> +<P> +In both examples 6 jobs are submitted. Each job requests 2 CPUs per node, and +all of the nodes contain two quad-core processors. The timeslicer will initially +let the first 4 jobs run and suspend the last 2 jobs. The manner in which these +jobs are timesliced depends upon the configured <I>SelectTypeParameter</I>. +</P> +<P> +In the first example <I>CR_Core_Memory</I> is configured. Note that jobs 46 and +47 don't <U>ever</U> get suspended. This is because they are not sharing their +cores with any other job. Jobs 48 and 49 were allocated to the same cores as +jobs 45 and 46. The timeslicer recognizes this and timeslices only those jobs: +</P> +<PRE> +[user@n16 load]$ <B>sinfo</B> +PARTITION AVAIL TIMELIMIT NODES STATE NODELIST +active* up infinite 5 idle n[12-16] + +[user@n16 load]$ <B>scontrol show config | grep Select</B> +SelectType = select/cons_res +SelectTypeParameters = CR_CORE_MEMORY + +[user@n16 load]$ <B>sinfo -o "%20N %5D %5c %5z"</B> +NODELIST NODES CPUS S:C:T +n[12-16] 5 8 2:4:1 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 44 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 45 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 46 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 47 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 48 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 49 + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 44 active myload user R 0:09 5 n[12-16] + 45 active myload user R 0:08 5 n[12-16] + 46 active myload user R 0:08 5 n[12-16] + 47 active myload user R 0:07 5 n[12-16] + 48 active myload user S 0:00 5 n[12-16] + 49 active myload user S 0:00 5 n[12-16] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 46 active myload user R 0:49 5 n[12-16] + 47 active myload user R 0:48 5 n[12-16] + 48 active myload user R 0:06 5 n[12-16] + 49 active myload user R 0:06 5 n[12-16] + 44 active myload user S 0:44 5 n[12-16] + 45 active myload user S 0:43 5 n[12-16] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 44 active myload user R 1:23 5 n[12-16] + 45 active myload user R 1:22 5 n[12-16] + 46 active myload user R 2:22 5 n[12-16] + 47 active myload user R 2:21 5 n[12-16] + 48 active myload user S 1:00 5 n[12-16] + 49 active myload user S 1:00 5 n[12-16] +</PRE> +<P> +Note the runtime of all 6 jobs in the output of the last <I>squeue</I> command. +Jobs 46 and 47 have been running continuously, while jobs 45 and 46 are +splitting their runtime with jobs 48 and 49. +</P> +<P> +The next example has <I>CR_CPU_Memory</I> configured and the same 6 jobs are +submitted. Here the selector and the timeslicer treat the CPUs as countable +resources which results in all 6 jobs sharing time on the CPUs: +</P> +<PRE> +[user@n16 load]$ <B>sinfo</B> +PARTITION AVAIL TIMELIMIT NODES STATE NODELIST +active* up infinite 5 idle n[12-16] + +[user@n16 load]$ <B>scontrol show config | grep Select</B> +SelectType = select/cons_res +SelectTypeParameters = CR_CPU_MEMORY + +[user@n16 load]$ <B>sinfo -o "%20N %5D %5c %5z"</B> +NODELIST NODES CPUS S:C:T +n[12-16] 5 8 2:4:1 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 51 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 52 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 53 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 54 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 55 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 56 + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 51 active myload user R 0:11 5 n[12-16] + 52 active myload user R 0:11 5 n[12-16] + 53 active myload user R 0:10 5 n[12-16] + 54 active myload user R 0:09 5 n[12-16] + 55 active myload user S 0:00 5 n[12-16] + 56 active myload user S 0:00 5 n[12-16] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 51 active myload user R 1:09 5 n[12-16] + 52 active myload user R 1:09 5 n[12-16] + 55 active myload user R 0:23 5 n[12-16] + 56 active myload user R 0:23 5 n[12-16] + 53 active myload user S 0:45 5 n[12-16] + 54 active myload user S 0:44 5 n[12-16] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 53 active myload user R 0:55 5 n[12-16] + 54 active myload user R 0:54 5 n[12-16] + 55 active myload user R 0:40 5 n[12-16] + 56 active myload user R 0:40 5 n[12-16] + 51 active myload user S 1:16 5 n[12-16] + 52 active myload user S 1:16 5 n[12-16] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 51 active myload user R 3:18 5 n[12-16] + 52 active myload user R 3:18 5 n[12-16] + 53 active myload user R 3:17 5 n[12-16] + 54 active myload user R 3:16 5 n[12-16] + 55 active myload user S 3:00 5 n[12-16] + 56 active myload user S 3:00 5 n[12-16] +</PRE> +<P> +Note that the runtime of all 6 jobs is roughly equal. Jobs 51-54 ran first so +they're slightly ahead, but so far all jobs have run for at least 3 minutes. +</P> +<P> +At the core level this means that SLURM relies on the linux kernel to move jobs +around on the cores to maximize performance. This is different than when +<I>CR_Core_Memory</I> was configured and the jobs would effectively remain +"pinned" to their specific cores for the duration of the job. Note that +<I>CR_Core_Memory</I> supports CPU binding, while <I>CR_CPU_Memory</I> does not. </P> <H2>Future Work</H2> - -<P> -Priority scheduling and preemptive scheduling are other forms of gang -scheduling that are currently under development for SLURM. -</P> -<P> -<B>Making use of swap space</B>: (note that this topic is not currently -scheduled for development, unless someone would like to pursue this) It should -be noted that timeslicing does provide an interesting mechanism for high -performance jobs to make use of swap space. The optimal scenario is one in which -suspended jobs are "swapped out" and active jobs are "swapped in". The swapping -activity would only occur once every <I>SchedulerTimeslice</I> interval. -</P> -<P> -However, SLURM should first be modified to include support for scheduling jobs -into swap space and to provide controls to prevent overcommitting swap space. -For now this idea could be experimented with by disabling memory support in the -selector and submitting appropriately sized jobs. -</P> - -<p style="text-align:center;">Last modified 17 March 2008</p> + +<P> +Priority scheduling and preemptive scheduling are other forms of gang +scheduling that are currently under development for SLURM. +</P> +<P> +<B>Making use of swap space</B>: (note that this topic is not currently +scheduled for development, unless someone would like to pursue this) It should +be noted that timeslicing does provide an interesting mechanism for high +performance jobs to make use of swap space. The optimal scenario is one in which +suspended jobs are "swapped out" and active jobs are "swapped in". The swapping +activity would only occur once every <I>SchedulerTimeslice</I> interval. +</P> +<P> +However, SLURM should first be modified to include support for scheduling jobs +into swap space and to provide controls to prevent overcommitting swap space. +For now this idea could be experimented with by disabling memory support in the +selector and submitting appropriately sized jobs. +</P> + +<p style="text-align:center;">Last modified 7 July 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/header.txt b/doc/html/header.txt index 2f84c37a8..1adedfd3d 100644 --- a/doc/html/header.txt +++ b/doc/html/header.txt @@ -8,8 +8,8 @@ <meta http-equiv="Pragma" content="no-cache"> <meta http-equiv="keywords" content="Simple Linux Utility for Resource Management, SLURM, resource management, Linux clusters, high-performance computing, Livermore Computing"> -<meta name="LLNLRandR" content="LLNL-WEB-402631"> -<meta name="LLNLRandRdate" content="18 December 2006"> +<meta name="LLNLRandR" content="LLNL-WEB-405518"> +<meta name="LLNLRandRdate" content="18 July 2008"> <meta name="distribution" content="global"> <meta name="description" content="Simple Linux Utility for Resource Management"> <meta name="copyright" diff --git a/doc/html/moab.shtml b/doc/html/moab.shtml index 247b3f734..3ee5c7b15 100644 --- a/doc/html/moab.shtml +++ b/doc/html/moab.shtml @@ -198,18 +198,30 @@ that the user's environment on the execution host be loaded. Moab relies upon SLURM to perform this action, using the <i>--get-user-env</i> option for the salloc, sbatch and srun commands. The SLURM command then executes as user root a command of this sort -as user root: +as user root:</p> <pre> /bin/su - <user> -c \ "/bin/echo BEGIN; /bin/env; /bin/echo FINI" </pre> -While this command is executing within salloc, sbatch or srun, -the Moab daemon is completely non-responsive. +<p> For typical batch jobs, the job transfer from Moab to +SLURM is performed using <i>sbatch</i> and occurs instantaneously. +The environment is loadeded by a SLURM daemon (slurmd) when the +batch job begins execution. +For interactive jobs (<i>msub -I ...</i>), the job transfer +from Moab to SLURM can not be completed until the environment +variables are loaded, during which time the Moab daemon is +completely non-responsive. To insure that Moab remains operational, SLURM will abort the above -command within a few seconds and look for a cache file with the -user's environment and use that if found. +command within a configurable period of time and look for a cache +file with the user's environment and use that if found. Otherwise an error is reported to Moab. -We have provided a simple program that can be used to build +The time permitted for loading the current environment +before searching for a cache file is configurable using +the <i>GetEnvTimeout</i> parameter in SLURM's configuration +file, slurm.conf. A value of zero results in immediately +using the cache file. The default value is 2 seconds.</p> + +<p>We have provided a simple program that can be used to build cache files for users. The program can be found in the SLURM distribution at <i>contribs/env_cache_builder.c</i>. This program can support a longer timeout than Moab, but @@ -247,6 +259,6 @@ Write the output to a file with the same name as the user in the <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 23 April 2008</p> +<p style="text-align:center;">Last modified 10 July 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/preempt.shtml b/doc/html/preempt.shtml index d58acf003..2f9bd34df 100644 --- a/doc/html/preempt.shtml +++ b/doc/html/preempt.shtml @@ -44,10 +44,15 @@ chosen, we recommend setting <I>SelectTypeParameter=CR_Memory</I>. When (ex. <I>SelectTypeParameter=CR_Core_Memory</I>). </LI> <LI> -<B>DefMemPerTask</B>: Since job requests may not explicitly specify -a memory requirement, we also recommend configuring <I>DefMemPerTask</I> -(default memory per task). It may also be desirable to configure -<I>MaxMemPerTask</I> (maximum memory per task) in <I>slurm.conf</I>. +<B>DefMemPerCPU</B>: Since job requests may not explicitly specify +a memory requirement, we also recommend configuring +<I>DefMemPerCPU</I> (default memory per allocated CPU) or +<I>DefMemPerNode</I> (default memory per allocated node). +It may also be desirable to configure +<I>MaxMemPerCPU</I> (maximum memory per allocated CPU) or +<I>MaxMemPerNode</I> (maximum memory per allocated node) in <I>slurm.conf</I>. +Users can use the <I>--mem</I> or <I>--mem-per-cpu</I> option +at job submission time to specify their memory requirements. </LI> <LI> <B>JobAcctGatherType and JobAcctGatherFrequency</B>: @@ -158,7 +163,6 @@ Here are the Partition settings: [user@n16 ~]$ <B>grep PartitionName /shared/slurm/slurm.conf</B> PartitionName=active Priority=1 Default=YES Shared=FORCE:1 Nodes=n[12-16] PartitionName=hipri Priority=2 Shared=FORCE:1 Nodes=n[12-16] -[user@n16 ~]$ </PRE> <P> The <I>runit.pl</I> script launches a simple load-generating app that runs @@ -183,7 +187,6 @@ JOBID PARTITION NAME USER ST TIME NODES NODELIST 487 active runit.pl user R 0:05 1 n14 488 active runit.pl user R 0:05 1 n15 489 active runit.pl user R 0:04 1 n16 -[user@n16 ~]$ </PRE> <P> Now submit a short-running 3-node job to the <I>hipri</I> partition: @@ -199,7 +202,6 @@ JOBID PARTITION NAME USER ST TIME NODES NODELIST 486 active runit.pl user S 0:27 1 n13 487 active runit.pl user S 0:26 1 n14 490 hipri runit.pl user R 0:03 3 n[12-14] -[user@n16 ~]$ </PRE> <P> Job 490 in the <I>hipri</I> partition preempted jobs 485, 486, and 487 from @@ -218,7 +220,6 @@ JOBID PARTITION NAME USER ST TIME NODES NODELIST 487 active runit.pl user R 0:29 1 n14 488 active runit.pl user R 0:59 1 n15 489 active runit.pl user R 0:58 1 n16 -[user@n16 ~]$ </PRE> @@ -242,6 +243,6 @@ again. This will be investigated at some point in the future. Requeuing a preempted job may make the most sense with <I>Shared=NO</I> partitions. </P> -<p style="text-align:center;">Last modified 11 April 2008</p> +<p style="text-align:center;">Last modified 7 July 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/quickstart.shtml b/doc/html/quickstart.shtml index 7eac62a42..af47c6940 100644 --- a/doc/html/quickstart.shtml +++ b/doc/html/quickstart.shtml @@ -305,22 +305,31 @@ The <span class="commandline">salloc</span> command is would be used to create a resource allocation and typically start a shell within that allocation. One or more job steps would typically be executed within that allocation -using the srun command to launch the tasks. -Finally the shell created by salloc would be terminated using the -<i>exit</i> command. -In this example we will also use the <span class="commandline">sbcast</span> -command to transfer the executable program to local storage, /tmp/joe.a.out, -on the allocated nodes (1024 nodes in this example). +using the <span class="commandline">srun</span> command to launch the tasks +(depending upon the type of MPI being used, the launch mechanism may +differ, see <a href="#mpi">MPI</a> details below). +Finally the shell created by <span class="commandline">salloc</span> would +be terminated using the <i>exit</i> command. +SLURM does not automatically migrate executable or data files +to the nodes allocated to a job. +Either the files must exists on local disk or in some global file system +(e.g. NFS or Lustre). +We provide the tool <span class="commandline">sbcast</span> to transfer +files to local storage on allocated nodes using SLURM's hierarchical +communications. +In this example we use <span class="commandline">sbcast</span> to transfer +the executable program <i>a.out</i> to <i>/tmp/joe.a.out</i> on local storage +of the allocated nodes. After executing the program, we delete it from local storage</p> <pre> tux0: salloc -N1024 bash $ sbcast a.out /tmp/joe.a.out Granted job allocation 471 $ srun /tmp/joe.a.out -Result is 471 +Result is 3.14159 $ srun rm /tmp/joe.a.out $ exit -salloc: Relinquishing job allocation 1234 +salloc: Relinquishing job allocation 471 </pre> <p>In this example, we submit a batch job, get its status, and cancel it. </p> @@ -568,6 +577,6 @@ sbatch: Submitted batch job 1234 tasks. These tasks are not managed by SLURM since they are launched outside of its control.</p> -<p style="text-align:center;">Last modified 2 June 2008</p> +<p style="text-align:center;">Last modified 16 July 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am index e8d06ea88..ecc4cfe72 100644 --- a/doc/man/Makefile.am +++ b/doc/man/Makefile.am @@ -60,6 +60,7 @@ man3_MANS = man3/slurm_hostlist_create.3 \ man3/slurm_kill_job.3 \ man3/slurm_kill_job_step.3 \ man3/slurm_load_ctl_conf.3 \ + man3/slurm_load_job.3 \ man3/slurm_load_jobs.3 \ man3/slurm_load_node.3 \ man3/slurm_load_partitions.3 \ diff --git a/doc/man/Makefile.in b/doc/man/Makefile.in index 109e32167..7aaf763cc 100644 --- a/doc/man/Makefile.in +++ b/doc/man/Makefile.in @@ -301,6 +301,7 @@ man3_MANS = man3/slurm_hostlist_create.3 \ man3/slurm_kill_job.3 \ man3/slurm_kill_job_step.3 \ man3/slurm_load_ctl_conf.3 \ + man3/slurm_load_job.3 \ man3/slurm_load_jobs.3 \ man3/slurm_load_node.3 \ man3/slurm_load_partitions.3 \ diff --git a/doc/man/man1/sacctmgr.1 b/doc/man/man1/sacctmgr.1 index 5c29d74d5..01dad39b3 100644 --- a/doc/man/man1/sacctmgr.1 +++ b/doc/man/man1/sacctmgr.1 @@ -20,7 +20,7 @@ These parameters are \fIuser\fR, \fIcluster\fR, \fIpartition\fR, and the \fIClusterName\fR parameter in the \fIslurm.conf\fR configuration file. \fIpartition\fR is the name of a Slurm partition on that cluster. \fIaccount\fR is the bank account for a job. -The intended mode of operation is to initiate the \fBssacctmgr\fR command, +The intended mode of operation is to initiate the \fBsacctmgr\fR command, add, delete, modify, and/or list \fIassociation\fR records then commit the changes and exit. @@ -46,35 +46,35 @@ commit changes immediately. .TP \fB\-n\fR, \fB\-\-no_header\fR -Don't display header when listing results. +No header will be added to the beginning of the output. .TP \fB\-o\fR, \fB\-\-oneliner\fR -Print information one line per record. +Report output one record per line. This is equivalent to the \fBoneliner\fR command. .TP -\fB\-n\fR, \fB\-\-parsable\fR -Make output '|' delimited. +\fB\-p\fR, \fB\-\-parsable\fR +Output will be '|' delimited. .TP \fB\-q\fR, \fB\-\-quiet\fR -Print no warning or informational messages, only error messages. +Print no messages other than error messages. This is equivalent to the \fBquiet\fR command. .TP \fB\-s\fR, \fB\-\-associations\fR -Show an association for entities displayed. +Use with show or list to display associations with the entity. This is equivalent to the \fBassociations\fR command. .TP \fB\-v\fR, \fB\-\-verbose\fR -Print detailed event logging. +Enable detailed logging. This is equivalent to the \fBverbose\fR command. .TP \fB\-V\fR , \fB\-\-version\fR -Print version information and exit. +Display version number. This is equivalent to the \fBversion\fR command. .SH "COMMANDS" @@ -90,7 +90,7 @@ Identical to the \fBcreate\fR command. .TP \fBassociations\fR -Show associations for entities displayed. +Use with show or list to display associations with the entity. .TP \fBcreate\fR <\fIENTITY\fR> <\fISPECS\fR> @@ -103,7 +103,7 @@ Delete the specified entities. .TP \fBexit\fP -Terminate the execution of sacctmgr. +Terminate sacctmgr. Identical to the \fBquit\fR command. .TP @@ -122,15 +122,15 @@ Identical to the \fBshow\fR command. .TP \fBmodify\fR <\fIENTITY\fR> \fbwith\fR <\fISPECS\fR> \fbset\fR <\fISPECS\fR> -Modify an entities. +Modify an entity. .TP \fBoneliner\fP -Print information one line per record. +Output one record per line. .TP \fBquiet\fP -Print no warning or informational messages, only fatal error messages. +Print no messages other than error messages. .TP \fBquit\fP @@ -145,17 +145,17 @@ Identical to the \fBlist\fR command. .TP \fBverbose\fP -Print detailed event logging. +Enable detailed logging. This includes time\-stamps on data structures, record counts, etc. This is an independent command with no options meant for use in interactive mode. .TP \fBversion\fP -Display the version number of sacctmgr being executed. +Display the version number of sacctmgr. .TP \fB!!\fP -Repeat the last command executed. +Repeat the last command. .TP \fBENTITIES\fR @@ -179,6 +179,21 @@ The entity used to group information consisting of four parameters: The \fIClusterName\fR parameter in the \fIslurm.conf\fR configuration file, used to differentiate accounts from on different machines. +.TP +\fIcoordinator\fR +A special priviaged user usually an account manager or such that can +add users or sub accounts to the account they are coordinator over. +This should be a trusted person since they can change limits on +account and user associations inside their realm. + +.TP +\fIqos\fR +Quality of Service (For use with MOAB only). + +.TP +\fItransaction\fR +List of transactions that have occured during a given time period. + .TP \fIuser\fR The login name. @@ -188,6 +203,7 @@ The login name. .TP \fICluster\fP=<cluster> Specific cluster to add account to. Default is all in system. +.TP \fIDescription\fP=<description> An arbitrary string describing an account. .TP @@ -279,6 +295,24 @@ Quality of Service jobs are to run at for this account. Now consisting of Normal, Standby, Expedite, and Exempt. This is overridden if set directly on an account or user. +.TP +\fBSPECIFICATIONS FOR COORDINATOR\fR +.TP +\fIAccountsfP=<comma separated list of account names> +Account name to add this user as a coordinator to. +.TP +\fINames\fP=<comma separated list of user names> +Names of coordinators. + +.TP +\fBSPECIFICATIONS FOR QOS\fR +.TP +\fIDescription\fP=<description> +An arbitrary string describing an account. +.TP +\fINames\fP=<qos> +Names of qos. + .TP \fBSPECIFICATIONS FOR USERS\fR .TP diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index 72a8bcdb5..356f916d0 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -1,4 +1,4 @@ -.TH "salloc" "1" "SLURM 1.3" "May 2008" "SLURM Commands" +.TH "salloc" "1" "SLURM 1.3" "July 2008" "SLURM Commands" .SH "NAME" .LP salloc \- Obtain a SLURM job allocation (a set of nodes), execute a command, and then release the allocation when the command is finished. @@ -306,12 +306,24 @@ The default value is the username of the submitting user. .TP \fB\-\-mem\fR[=]<\fIMB\fR> Specify the real memory required per node in MegaBytes. -If a value is specified, that quantity of memory will be -reserved for this job. -If no value is specified and real memory is exhausted on -any allocated node then the job is subject to cancellation. -Also see \fB\-\-task\-mem\fR. - +Default value is \fBDefMemPerNode\fR and the maximum value is +\fBMaxMemPerNode\fR. If configured, both of parameters can be +seen using the \fBscontrol show config\fR command. +This parameter would generally be used of whole nodes +are allocated to jobs (\fBSelectType=select/linear\fR). +Also see \fB\-\-mem\-per\-cpu\fR. +\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive. + +.TP +\fB\-\-mem\-per\-cpu\fR[=]<\fIMB\fR> +Mimimum memory required per allocated CPU in MegaBytes. +Default value is \fBDefMemPerCPU\fR and the maximum value is +\fBMaxMemPerCPU\fR. If configured, both of parameters can be +seen using the \fBscontrol show config\fR command. +This parameter would generally be used of individual processors +are allocated to jobs (\fBSelectType=select/cons_res\fR). +Also see \fB\-\-mem\fR. +\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive. .TP \fB\-\-mincores\fR[=]<\fIn\fR> @@ -495,13 +507,6 @@ Acceptable time formats include "minutes", "minutes:seconds", "hours:minutes:seconds", "days\-hours", "days\-hours:minutes" and "days\-hours:minutes:seconds". -.TP -\fB\-\-task\-mem\fR[=]<\fIMB\fR> -Mimimum memory available per task in MegaBytes. -Default value is \fBDefMemPerTask\fR and the maximum value is -\fBMaxMemPerTask\fR, both of which can be seen using the -\fBscontrol show config\fR command. - .TP \fB\-\-tmp\fR[=]<\fIMB\fR> Specify a minimum amount of temporary disk space. @@ -709,6 +714,7 @@ salloc \-N5 srun \-n10 myprogram .SH "COPYING" Copyright (C) 2006\-2007 The Regents of the University of California. +Copyright (C) 2008 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). LLNL\-CODE\-402394. .LP diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index 03accb92b..918f78431 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -1,4 +1,4 @@ -.TH "sbatch" "1" "SLURM 1.3" "May 2008" "SLURM Commands" +.TH "sbatch" "1" "SLURM 1.3" "July 2008" "SLURM Commands" .SH "NAME" .LP sbatch \- Submit a batch script to SLURM. @@ -330,11 +330,24 @@ The default value is the username of the submitting user. .TP \fB\-\-mem\fR[=]<\fIMB\fR> Specify the real memory required per node in MegaBytes. -If a value is specified, that quantity of memory will be -reserved for this job. -If no value is specified and real memory is exhausted on -any allocated node then the job is subject to cancellation. -Also see \fB\-\-task\-mem\fR. +Default value is \fBDefMemPerNode\fR and the maximum value is +\fBMaxMemPerNode\fR. If configured, both of parameters can be +seen using the \fBscontrol show config\fR command. +This parameter would generally be used of whole nodes +are allocated to jobs (\fBSelectType=select/linear\fR). +Also see \fB\-\-mem\-per\-cpu\fR. +\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive. + +.TP +\fB\-\-mem\-per\-cpu\fR[=]<\fIMB\fR> +Mimimum memory required per allocated CPU in MegaBytes. +Default value is \fBDefMemPerCPU\fR and the maximum value is +\fBMaxMemPerCPU\fR. If configured, both of parameters can be +seen using the \fBscontrol show config\fR command. +This parameter would generally be used of individual processors +are allocated to jobs (\fBSelectType=select/cons_res\fR). +Also see \fB\-\-mem\fR. +\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive. .TP \fB\-\-mincores\fR[=]<\fIn\fR> @@ -582,13 +595,6 @@ Acceptable time formats include "minutes", "minutes:seconds", "hours:minutes:seconds", "days\-hours", "days\-hours:minutes" and "days\-hours:minutes:seconds". -.TP -\fB\-\-task\-mem\fR[=]<\fIMB\fR> -Mimimum memory available per task in MegaBytes. -Default value is \fBDefMemPerTask\fR and the maximum value is -\fBMaxMemPerTask\fR, both of which can be seen using the -\fBscontrol show config\fR command. - .TP \fB\-\-tasks\-per\-node\fR[=]<\fIn\fR> Specify the number of tasks to be launched per node. @@ -867,6 +873,7 @@ host4 .SH "COPYING" Copyright (C) 2006\-2007 The Regents of the University of California. +Copyright (C) 2008 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). LLNL\-CODE\-402394. .LP diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index afe14ad59..5aca020e0 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -1,6 +1,4 @@ -.\" $Id: srun.1 14123 2008-05-23 20:22:46Z jette $ -.\" -.TH SRUN "1" "May 2008" "srun 1.3" "slurm components" +.TH SRUN "1" "July 2008" "srun 1.3" "slurm components" .SH "NAME" srun \- run parallel jobs @@ -425,11 +423,24 @@ The default value is the submitting user. .TP \fB\-\-mem\fR[=]<\fIMB\fR> Specify the real memory required per node in MegaBytes. -If a value is specified, that quantity of memory will be -reserved for this job. -If no value is specified and real memory is exhausted on -any allocated node then the job is subject to cancellation. -Also see \fB\-\-task\-mem\fR. +Default value is \fBDefMemPerNode\fR and the maximum value is +\fBMaxMemPerNode\fR. If configured, both of parameters can be +seen using the \fBscontrol show config\fR command. +This parameter would generally be used of whole nodes +are allocated to jobs (\fBSelectType=select/linear\fR). +Also see \fB\-\-mem\-per\-cpu\fR. +\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive. + +.TP +\fB\-\-mem\-per\-cpu\fR[=]<\fIMB\fR> +Mimimum memory required per allocated CPU in MegaBytes. +Default value is \fBDefMemPerCPU\fR and the maximum value is +\fBMaxMemPerCPU\fR. If configured, both of parameters can be +seen using the \fBscontrol show config\fR command. +This parameter would generally be used of individual processors +are allocated to jobs (\fBSelectType=select/cons_res\fR). +Also see \fB\-\-mem\fR. +\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive. .TP \fB\-\-mem_bind\fR=[{\fIquiet,verbose\fR},]\fItype\fR @@ -843,13 +854,6 @@ in slurm.conf is executed. This is meant to be a very short\-lived program. If it fails to terminate within a few seconds, it will be killed along with any descendant processes. -.TP -\fB\-\-task\-mem\fR[=]<\fIMB\fR> -Mimimum memory available per task in MegaBytes. -Default value is \fBDefMemPerTask\fR and the maximum value is -\fBMaxMemPerTask\fR, both of which can be seen using the -\fBscontrol show config\fR command. - .TP \fB\-\-task\-prolog\fR=\fIexecutable\fR The \fBslurmd\fR daemon will run \fIexecutable\fR just before launching @@ -1624,6 +1628,7 @@ wait .SH "COPYING" Copyright (C) 2006\-2007 The Regents of the University of California. +Copyright (C) 2008 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). LLNL\-CODE\-402394. .LP diff --git a/doc/man/man3/slurm_checkpoint_error.3 b/doc/man/man3/slurm_checkpoint_error.3 index 3a5fcf213..f79cb65ec 100644 --- a/doc/man/man3/slurm_checkpoint_error.3 +++ b/doc/man/man3/slurm_checkpoint_error.3 @@ -134,14 +134,14 @@ This can be issued as needed to prevent checkpointing while a job step is in a critical section or for other reasons. .LP \fBslurm_checkpoint_enable\fR -Make the indentified job step checkpointable. +Make the identified job step checkpointable. .LP \fBslurm_checkpoint_error\fR Get error information about the last checkpoint operation for a given job step. .LP \fBslurm_checkpoint_restart\fR Request that a previously checkpointed job resume execution. -It may continue execution on differrent nodes than were +It may continue execution on different nodes than were originally used. Execution may be delayed if resources are not immediately available. diff --git a/doc/man/man3/slurm_free_job_info_msg.3 b/doc/man/man3/slurm_free_job_info_msg.3 index 4471ae0cb..c31031800 100644 --- a/doc/man/man3/slurm_free_job_info_msg.3 +++ b/doc/man/man3/slurm_free_job_info_msg.3 @@ -25,6 +25,15 @@ void \fBslurm_free_job_info_msg\fR ( .br ); .LP +int \fBslurm_load_job\fR ( +.br + job_info_msg_t **\fIjob_info_msg_pptr\fP, +.br + uint32_t \fIjob_id\fP +.br +); +.LP +.LP int \fBslurm_load_jobs\fR ( .br time_t \fIupdate_time\fP, @@ -53,7 +62,7 @@ int \fBslurm_get_end_time\fR ( .LP long \fBslurm_get_rem_time\fR ( .br - uint32_t \fIjobid\fP + uint32_t \fIjob_id\fP .br ); .LP @@ -136,11 +145,13 @@ Specifies a pointer to a storage location into which a Slurm job id may be placed. .TP \fIjob_info_msg_ptr\fP -Specifies the pointer to the structure created by \fBslurm_load_jobs\fR. +Specifies the pointer to the structure created by \fBslurm_load_job\fR +or \fBslurm_load_jobs\fR. .TP \fIjobinfo\fP Job\-specific information as constructed by Slurm's NodeSelect plugin. -This data object is returned for each job by the \fBslurm_load_jobs\fR function. +This data object is returned for each job by the \fBslurm_load_job\fR or +\fBslurm_load_jobs\fR function. .TP \fIjob_pid\fP Specifies a process id of some process on the current node. @@ -183,6 +194,9 @@ expected termination time of a specified SLURM job id. The time corresponds to the exhaustion of the job\'s or partition\'s time limit. NOTE: The data is cached locally and only retrieved from the SLURM controller once per minute. .LP +\fBslurm_load_job\fR Returns a job_info_msg_t that contains an update time, +record count, and array of job_table records for some specific job ID. +.LP \fBslurm_load_jobs\fR Returns a job_info_msg_t that contains an update time, record count, and array of job_table records for all jobs. .LP @@ -331,6 +345,7 @@ expressions into a collection of individual node names. .SH "COPYING" Copyright (C) 2002\-2006 The Regents of the University of California. +Copyright (C) 2008 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). LLNL\-CODE\-402394. .LP diff --git a/doc/man/man3/slurm_load_job.3 b/doc/man/man3/slurm_load_job.3 new file mode 100644 index 000000000..836ffa79b --- /dev/null +++ b/doc/man/man3/slurm_load_job.3 @@ -0,0 +1 @@ +.so man3/slurm_free_job_info_msg.3 diff --git a/doc/man/man3/slurm_step_ctx_create.3 b/doc/man/man3/slurm_step_ctx_create.3 index 76f6cff97..c949a221f 100644 --- a/doc/man/man3/slurm_step_ctx_create.3 +++ b/doc/man/man3/slurm_step_ctx_create.3 @@ -126,7 +126,7 @@ the second of type char **. \fBSLURM_STEP_CTX_CHDIR\fR Have the remote process change directory to the specified location before beginning execution. Accepts one argument of type -char * indentifying the directory's pathname. By default +char * identifying the directory's pathname. By default the remote process will execute in the same directory pathname from which it is spawned. NOTE: This assumes that same directory pathname exists on the other nodes. diff --git a/doc/man/man3/slurm_step_launch.3 b/doc/man/man3/slurm_step_launch.3 index cda4e827e..5434d9f81 100644 --- a/doc/man/man3/slurm_step_launch.3 +++ b/doc/man/man3/slurm_step_launch.3 @@ -1,4 +1,4 @@ -.TH "Slurm API" "3" "December 2006" "Morris Jette" "Slurm job step launch functions" +.TH "Slurm API" "3" "July 2008" "Morris Jette" "Slurm job step launch functions" .SH "NAME" @@ -19,6 +19,8 @@ void \fBslurm_step_launch_params_t_init\fR ( int \fBslurm_step_launch\fR ( .br slurm_step_ctx \fIctx\fP, +.br + char * \fIlauncher_host\fP, .br const slurm_step_launch_params_t *\fIlaunch_req\fP, .br @@ -57,6 +59,12 @@ function calls, and destroyed by \fBslurm_step_ctx_destroy\fR. \fIlaunch_req\fP Pointer to a structure allocated by the user containing specifications of the job step to be launched. +.TP +\fIlauncher_host\fP +Host name or address to be used to identify the destination of PMI communications +for MPICH2. We intend to embed this information within \fIlaunch_req\fP in the +next major release of SLURM, when changes to the protocol can be more easily +addressed. .SH "DESCRIPTION" .LP @@ -74,6 +82,40 @@ default values. This function will NOT allocate any new memory. \fBslurm_step_launch_abort\fR Abort an in-progress launch, or terminate the fully launched job step. Can be called from a signal handler. +.SH "IO Redirection" +.LP +Use the \fIlocal_fds\fR entry in \fIslurm_step_launch_params_t\fR +to specify file descriptors to be used for standard input, output +and error. Any \fIlocal_fds\fR not specified will result in the launched +tasks using the calling process's standard input, output and error. +Threads created by \fBslurm_step_launch\fR will completely handle +copying data between the remote processes and the specified local file +descriptors. +.LP +Use the substructure in \fIslurm_step_io_fds_t\fR to restrict the +redirection of I/O to a specific node or task ID. For example, to +redirect standard output only from task 0, set +.LP +.nf +params.local_fs.out.taskid=0; +.fi +.LP +Use the \fIremote_*_filename\fR fields in \fIslurm_step_launch_params_t\fR +to have launched tasks read and/or write directly to local files +rather than transferring data over the network to the calling process. +These strings support many of the same format options as the \fBsrun\fR +command. Any \fIremote_*_filename\fR fields set will supersede the +corresponding \fIlocal_fds\fR entries. For example, the following +code will direct each task to write standard output and standard +error to local files with names containing the task ID (e.g. +"/home/bob/test_output/run1.out.0" and +"/home/bob/test_output/run.1.err.0" for task 0). +.LP +.nf +params.remote_output_filename = "/home/bob/test_output/run1.out.%t" +params.remote_error_filename = "/home/bob/test_output/run1.err.%t" +.fi + .SH "RETURN VALUE" .LP \fBslurm_step_launch\fR and \fBslurm_step_launch_wait_start\fR @@ -154,7 +196,7 @@ int main (int argc, char *argv[]) params.argv = argv + 1; callbacks.task_start = _task_start; callbacks.task_finish = _task_finish; - if (slurm_step_launch(step_ctx, ¶ms, &callbacks) + if (slurm_step_launch(step_ctx, NULL, ¶ms, &callbacks) != SLURM_SUCCESS) { slurm_perror("slurm_step_launch"); exit(1); @@ -181,7 +223,8 @@ which must be linked to your process for use (e.g. "cc \-lslurm myprog.c"). .SH "COPYING" -Copyright (C) 2006 The Regents of the University of California. +Copyright (C) 2006-2007 The Regents of the University of California. +Copyright (C) 2008 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). LLNL\-CODE\-402394. .LP diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 1d908839d..04f5b0b9a 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -1,4 +1,4 @@ -.TH "slurm.conf" "5" "June 2008" "slurm.conf 1.3" "Slurm configuration file" +.TH "slurm.conf" "5" "July 2008" "slurm.conf 1.3" "Slurm configuration file" .SH "NAME" slurm.conf \- Slurm configuration file @@ -136,7 +136,7 @@ Name that \fBBackupController\fR should be referred to in establishing a communications path. This name will be used as an argument to the gethostbyname() function for identification. For example, "elx0000" might be used to designate -the ethernet address for node "lx0000". +the Ethernet address for node "lx0000". By default the \fBBackupAddr\fR will be identical in value to \fBBackupController\fR. @@ -182,7 +182,7 @@ Name that \fBControlMachine\fR should be referred to in establishing a communications path. This name will be used as an argument to the gethostbyname() function for identification. For example, "elx0000" might be used to designate -the ethernet address for node "lx0000". +the Ethernet address for node "lx0000". By default the \fBControlAddr\fR will be identical in value to \fBControlMachine\fR. @@ -208,11 +208,25 @@ License (GPL). The default value is "crypto/openssl". .TP -\fBDefMemPerTask\fR -Default real memory size available per task in MegaBytes. +\fBDefMemPerCPU\fR +Default real memory size available per allocated CPU in MegaBytes. Used to avoid over\-subscribing memory and causing paging. -Also see \fBMaxMemPerTask\fR. +\fBDefMemPerCPU\fR would generally be used if individual processors +are alocated to jobs (\fBSelectType=select/cons_res\fR). The default value is 0 (unlimited). +Also see \fBDefMemPerNode\fR and \fBMaxMemPerCPU\fR. +\fBDefMemPerCPU\fR and \fBDefMemPerNode\fR are mutually exclusive. + +.TP +\fBDefMemPerNode\fR +Default real memory size available per allocated node in MegaBytes. +Used to avoid over\-subscribing memory and causing paging. +\fBDefMemPerNode\fR would generally be used if whole nodes +are alocated to jobs (\fBSelectType=select/linear\fR) and +resources are shared (\fBShared=yes\fR or \fBShared=force\fR). +The default value is 0 (unlimited). +Also see \fBDefMemPerCPU\fR and \fBMaxMemPerNode\fR. +\fBDefMemPerCPU\fR and \fBDefMemPerNode\fR are mutually exclusive. .TP \fBDefaultStorageHost\fR @@ -431,9 +445,11 @@ Also see \fBDefaultStoragePort\fR. \fBJobCompType\fR Define the job completion logging mechanism type. Acceptable values at present include "jobcomp/none", "jobcomp/filetxt", -"jobcomp/mysql", "jobcomp/pgsql", "jobcomp/script"and "jobcomp/slurmdbd". +"jobcomp/mysql", "jobcomp/pgsql", and "jobcomp/script"". The default value is "jobcomp/none", which means that upon job completion -the record of the job is purged from the system. +the record of the job is purged from the system. If using the accounting +infrastructure this plugin may not be of interest since the information +here is redundant. The value "jobcomp/filetxt" indicates that a record of the job should be written to a text file specified by the \fBJobCompLoc\fR parameter. The value "jobcomp/mysql" indicates that a record of the job should be @@ -443,10 +459,6 @@ written to a postgresql database specified by the \fBJobCompLoc\fR parameter. The value "jobcomp/script" indicates that a script specified by the \fBJobCompLoc\fR parameter is to be executed with environment variables indicating the job information. -The value "jobcomp/slurmdbd" indicates that job completion records -will be written to SlurmDbd, which maintains its own database. See -"man slurmdbd" for more information. -Also see \fBDefaultStorageType\fR. .TP \fBJobCompUser\fR @@ -525,11 +537,25 @@ of the slurmctld daemon. May not exceed 65533. .TP -\fBMaxMemPerTask\fR -Maximum real memory size available per task in MegaBytes. +\fBMaxMemPerCPU\fR +Maximum real memory size available per allocated CPU in MegaBytes. +Used to avoid over\-subscribing memory and causing paging. +\fBMaxMemPerCPU\fR would generally be used if individual processors +are alocated to jobs (\fBSelectType=select/cons_res\fR). +The default value is 0 (unlimited). +Also see \fBDefMemPerCPU\fR and \fBMaxMemPerNode\fR. +\fBMaxMemPerCPU\fR and \fBMaxMemPerNode\fR are mutually exclusive. + +.TP +\fBMaxMemPerNode\fR +Maximum real memory size available per allocated node in MegaBytes. Used to avoid over\-subscribing memory and causing paging. -Also see \fBDefMemPerTask\fR. +\fBMaxMemPerNode\fR would generally be used if whole nodes +are alocated to jobs (\fBSelectType=select/linear\fR) and +resources are shared (\fBShared=yes\fR or \fBShared=force\fR). The default value is 0 (unlimited). +Also see \fBDefMemPerNode\fR and \fBMaxMemPerCPU\fR. +\fBMaxMemPerCPU\fR and \fBMaxMemPerNode\fR are mutually exclusive. .TP \fBMessageTimeout\fR @@ -576,10 +602,21 @@ on SPANK plugins, see the \fBspank\fR(8) manual. .TP \fBPrivateData\fR -If non-zero then users are unable to view jobs or job steps belonging -to other users (except for SlurmUser or root, who can view all jobs). -The default value is "0", permitting any user to view any jobs or -job steps. +This controls what type of information is hidden from regular users. +By default, all information is visible to all users. +User \fBSlurmUser\fR and root can always view all information. +Multiple values may be specified with a comma separator. +Acceptable values include: +.RS +.TP +\fBjobs\fR prevents users from viewing jobs or job steps belonging +to other users. +.TP +\fBnodes\fR prevents users from viewing node state information. +.TP +\fBpartitions\fR prevents users from viewing partition state information. +.RE + .TP \fBProctrackType\fR @@ -835,22 +872,26 @@ On single\-core systems, each CPUs will be considered a CPU. .TP \fBCR_CPU_Memory\fR CPUs and memory are consumable resources. +Setting a value for \fBDefMemPerCPU\fR is strongly recommended. .TP \fBCR_Core\fR Cores are consumable resources. .TP \fBCR_Core_Memory\fR Cores and memory are consumable resources. +Setting a value for \fBDefMemPerCPU\fR is strongly recommended. .TP \fBCR_Socket\fR Sockets are consumable resources. .TP \fBCR_Socket_Memory\fR Memory and CPUs are consumable resources. +Setting a value for \fBDefMemPerCPU\fR is strongly recommended. .TP \fBCR_Memory\fR Memory is a consumable resource. NOTE: This implies \fIShared=YES\fR or \fIShared=FORCE\fR for all partitions. +Setting a value for \fBDefMemPerCPU\fR is strongly recommended. .RE .TP @@ -1405,7 +1446,16 @@ memory nodes if either will satisfy a job's requirements. The units of weight are arbitrary, but larger weights should be assigned to nodes with more processors, memory, disk space, higher processor speed, etc. -Weight is an integer value with a default value of 1. +Note that if a job allocation request can not be satisfied +using the nodes with the lowest weight, the set of nodes +with the next lowest weight is added to the set of nodes +under consideration for use (repeat as needed for higher +weight values). If you absolutely want to minimize the number +of higher weight nodes allocated to a job (at a cost of higher +scheduling overhead), give each node a distinct \fBWeight\fR +value and they will be added to the pool of nodes being +considered for scheduling individually. +The default value is 1. .LP The "DownNodes=" configuration permits you to mark certain nodes as in a DOWN, DRAIN, FAIL, or FAILING state without altering the permanent @@ -1575,7 +1625,7 @@ Possible values for \fBShared\fR are "EXCLUSIVE", "FORCE", "YES", and "NO". .RS .TP 12 \fBEXCLUSIVE\fR -Aallocates entire nodes to jobs even with select/cons_res configured. +Allocates entire nodes to jobs even with select/cons_res configured. This can be used to allocate whole nodes in some partitions and individual processors in other partitions. .TP @@ -1592,7 +1642,7 @@ with gang scheduling (\fBSchedulerType=sched/gang\fR). .TP \fBYES\fR Make nodes in the partition available for sharing, but provides -the user with a means of getting dediated resources. +the user with a means of getting dedicated resources. If \fBSelectType=select/cons_res\fR, then resources will be over\-subscribed unless explicitly disabled in the job submit request using the "\-\-exclusive" option. diff --git a/doc/man/man8/spank.8 b/doc/man/man8/spank.8 index 7813a6d64..2ee7e5886 100644 --- a/doc/man/man8/spank.8 +++ b/doc/man/man8/spank.8 @@ -1,4 +1,4 @@ -.TH "SPANK" "8" "May 2006" "SPANK" "SLURM plug\-in architecture for Node and job (K)control" +.TH "SPANK" "8" "Jul 2008" "SPANK" "SLURM plug\-in architecture for Node and job (K)control" .SH "NAME" \fBSPANK\fR \- SLURM Plug\-in Architecture for Node and job (K)control @@ -34,9 +34,17 @@ Plugins may query the context in which they are running with the launch. A plugin may define the following functions: .TP 2 \fBslurm_spank_init\fR -Called just after plugins are loaded. In remote context, this is -just after job step is initialized. For local context, this is before -user options are processed. +Called just after plugins are loaded. In remote context, this is just +after job step is initialized. This function is called before any plugin +option processing. +.TP +\fBslurm_spank_init_post_opt\fR +Called at the same point as \fBslurm_spank_init\fR, but after all +user options to the plugin have been processed. The reason that the +\fBinit\fR and \fBinit_post_opt\fR callbacks are separated is so that +plugins can process system-wide options specified in plugstack.conf in +the \fBinit\fR callback, then process user options, and finaly take some +action in \fBslurm_spank_init_post_opt\fR if necessary. .TP \fBslurm_spank_local_user_init\fR Called in local (\fBsrun\fR or \fBsbatch\fR) context only after all diff --git a/slurm.spec b/slurm.spec index ba9bc65be..69ffeefa3 100644 --- a/slurm.spec +++ b/slurm.spec @@ -1,4 +1,4 @@ -# $Id: slurm.spec 14366 2008-06-26 20:04:12Z da $ +# $Id: slurm.spec 14616 2008-07-23 22:28:22Z jette $ # # Note that this package is not relocatable @@ -62,7 +62,7 @@ %slurm_with_opt aix %endif -# Build with sgijob, elan, and mysql plugins on CHAOS systems +# Build with sgijob, and mysql plugins on CHAOS systems %if %{?chaos}0 %slurm_with_opt mysql %slurm_with_opt sgijob @@ -71,14 +71,14 @@ %endif Name: slurm -Version: 1.3.5 -Release: 1%{?dist} +Version: 1.3.6 +Release: 1 Summary: Simple Linux Utility for Resource Management License: GPL Group: System Environment/Base -Source: slurm-1.3.5.tar.bz2 +Source: slurm-1.3.6.tar.bz2 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release} URL: https://computing.llnl.gov/linux/slurm/ @@ -249,7 +249,7 @@ SLURM process tracking plugin for SGI job containers. ############################################################################# %prep -%setup -n slurm-1.3.5 +%setup -n slurm-1.3.6 %build %configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \ @@ -439,7 +439,6 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/slurm/jobcomp_mysql.so %{_libdir}/slurm/jobcomp_pgsql.so %{_libdir}/slurm/jobcomp_script.so -%{_libdir}/slurm/jobcomp_slurmdbd.so %{_libdir}/slurm/proctrack_pgid.so %{_libdir}/slurm/proctrack_linuxproc.so %{_libdir}/slurm/sched_backfill.so diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 6f5573ed2..c48aa5d11 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -213,7 +213,7 @@ enum job_state_reason { /* Reasons for job to be pending */ WAIT_NO_REASON = 0, /* not set or job not pending */ WAIT_PRIORITY, /* higher priority jobs exist */ - WAIT_DEPENDENCY, /* depedent job has not completed */ + WAIT_DEPENDENCY, /* dependent job has not completed */ WAIT_RESOURCES, /* required resources not available */ WAIT_PART_NODE_LIMIT, /* request exceeds partition node limit */ WAIT_PART_TIME_LIMIT, /* request exceeds partition time limit */ @@ -448,8 +448,13 @@ typedef enum select_type_plugin_info { #define TASK_PARAM_CPUSETS 0x0001 #define TASK_PARAM_SCHED 0x0002 +#define MEM_PER_CPU 0x80000000 #define SHARED_FORCE 0x8000 +#define PRIVATE_DATA_JOBS 0x0001 /* job/step data is private */ +#define PRIVATE_DATA_NODES 0x0002 /* node data is private */ +#define PRIVATE_DATA_PARTITIONS 0x0004 /* partition data is private */ + /*****************************************************************************\ * PROTOCOL DATA STRUCTURE DEFINITIONS \*****************************************************************************/ @@ -528,7 +533,9 @@ typedef struct job_descriptor { /* For submit, allocate, and update requests */ uint16_t job_min_sockets; /* minimum sockets per node, default=0 */ uint16_t job_min_cores; /* minimum cores per processor, default=0 */ uint16_t job_min_threads; /* minimum threads per core, default=0 */ - uint32_t job_min_memory; /* minimum real memory per node, default=0 */ + uint32_t job_min_memory; /* minimum real memory per node OR + * real memory per CPU | MEM_PER_CPU, + * default=0 (no limit) */ uint32_t job_min_tmp_disk; /* minimum tmp disk per node, default=0 */ uint32_t num_procs; /* total count of processors required, * default=0 */ @@ -990,7 +997,8 @@ typedef struct slurm_ctl_conf { bluegene clusters NULL otherwise */ char *plugindir; /* pathname to plugins */ char *plugstack; /* pathname to plugin stack config file */ - uint16_t private_data; /* block viewing of other user jobs */ + uint16_t private_data; /* block viewing of information, + * see PRIVATE_DATA_* */ char *proctrack_type; /* process tracking plugin type */ char *prolog; /* pathname of job prolog */ uint16_t propagate_prio_process; /* 1 if process priority should @@ -1539,6 +1547,15 @@ void slurm_print_slurmd_status PARAMS(( * SLURM JOB CONTROL CONFIGURATION READ/PRINT/UPDATE FUNCTIONS \*****************************************************************************/ +/* + * slurm_load_job - issue RPC to get job information for one job ID + * IN job_info_msg_pptr - place to store a job configuration pointer + * IN job_id - ID of job we want information about + * RET 0 or -1 on error + * NOTE: free the response using slurm_free_job_info_msg + */ +extern int slurm_load_job PARAMS((job_info_msg_t **resp, uint32_t job_id)); + /* * slurm_load_jobs - issue RPC to get slurm all job configuration * information if changed since update_time diff --git a/slurm/spank.h b/slurm/spank.h index 27f2a7f9c..5ef0a5eb1 100644 --- a/slurm/spank.h +++ b/slurm/spank.h @@ -1,7 +1,8 @@ /*****************************************************************************\ * spank.h - Stackable Plug-in Architecture for Node job Kontrol ***************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * LLNL-CODE-402394. * @@ -61,6 +62,8 @@ typedef int (spank_f) (spank_t spank, int ac, char *argv[]); * * slurmd -> slurmstepd * `-> init () + * -> process spank options + * -> init_post_opt () * + drop privileges (initgroups(), seteuid(), chdir()) * `-> user_init () * + for each task @@ -82,6 +85,7 @@ typedef int (spank_f) (spank_t spank, int ac, char *argv[]); */ extern spank_f slurm_spank_init; +extern spank_f slurm_spank_init_post_opt; extern spank_f slurm_spank_local_user_init; extern spank_f slurm_spank_user_init; extern spank_f slurm_spank_task_init; @@ -127,7 +131,13 @@ enum spank_item { S_JOB_PID_TO_LOCAL_ID, /* local task id from pid (pid_t, uint32_t *) */ S_JOB_LOCAL_TO_GLOBAL_ID,/* local id to global id (uint32_t, uint32_t *) */ S_JOB_GLOBAL_TO_LOCAL_ID,/* global id to local id (uint32_t, uint32_t *) */ - S_JOB_SUPPLEMENTARY_GIDS /* Array of suppl. gids (gid_t **, int *) */ + S_JOB_SUPPLEMENTARY_GIDS,/* Array of suppl. gids (gid_t **, int *) */ + S_SLURM_VERSION, /* Current slurm version (char **) */ + S_SLURM_VERSION_MAJOR, /* Current slurm version major release (char **) */ + S_SLURM_VERSION_MINOR, /* Current slurm version minor release (char **) */ + S_SLURM_VERSION_MICRO, /* Current slurm version micro release (char **) */ + S_STEP_CPUS_PER_TASK /* CPUs allocated per task (=1 if --overcommit + * option is used, uint32_t *) */ }; typedef enum spank_item spank_item_t; @@ -216,8 +226,9 @@ int spank_remote (spank_t spank); /* Get the value for the current job or task item specified, * storing the result in the subsequent pointer argument(s). * Refer to the spank_item_t comments for argument types. - * For S_JOB_ARGV and S_JOB_ENV items the result returned to - * the caller should not be freed or modified. + * For S_JOB_ARGV, S_JOB_ENV, and S_SLURM_VERSION* items + * the result returned to the caller should not be freed or + * modified. * * Returns ESPANK_SUCCESS on success, ESPANK_NOTASK if an S_TASK* * item is requested from outside a task context, ESPANK_BAD_ARG diff --git a/src/api/Makefile.am b/src/api/Makefile.am index b5634c6e7..0000a8549 100644 --- a/src/api/Makefile.am +++ b/src/api/Makefile.am @@ -53,9 +53,15 @@ lib_LTLIBRARIES = libslurm.la libpmi.la BUILT_SOURCES = $(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT) libslurm.la # Note that libslurmhelper is mostly the same as libslurm, except that -# it exports ALL symbols, including those from libcommon, libeio, etc. -# Also, libslurmhelper is a convenience library, it is not installed. +# it exports ALL symbols used by the process, libcommon, libeio, etc. +# Only link with libslurmhelper if you are sure you are not going to be +# loading a plugin that could use something you yourself are not +# calling from here. +# libslurm.o only contains all the api symbols and will export +# them to plugins that are loaded. +# Also, libslurmhelper, libslurm.o are for convenience, they are not installed. noinst_LTLIBRARIES = libslurmhelper.la +noinst_PROGRAMS = libslurm.o slurmapi_src = \ allocate.c \ @@ -84,9 +90,9 @@ slurmapi_src = \ common_dir = $(top_builddir)/src/common -slurmapi_add = \ +slurmapi_add = \ $(common_dir)/libcommon.la \ - $(common_dir)/libspank.la \ + $(common_dir)/libspank.la \ $(common_dir)/libeio.la \ -lpthread @@ -96,7 +102,8 @@ libslurmhelper_la_LDFLAGs = \ $(LIB_LDFLAGS) \ -version-info $(current):$(rev):$(age) -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = \ + $(top_builddir)/src/api/libslurmhelper.la libslurm_la_SOURCES = libslurm_la_LIBADD = $(convenience_libs) @@ -115,8 +122,20 @@ libpmi_la_LIBADD = $(top_builddir)/src/api/libslurm.la libpmi_la_LDFLAGS = $(LIB_LDFLAGS) \ $(PMI_OTHER_FLAGS) +# This was made so we chould export all symbols from libcommon +# on multiple platforms +libslurm_o_SOURCES = + +libslurm_o_LDADD = $(top_builddir)/src/common/libeio.o \ + $(top_builddir)/src/common/libspank.o \ + $(top_builddir)/src/common/libcommon.o + +libslurm.o: $(libslurmhelper_la_OBJECTS) $(libslurm_o_LDADD) + $(LINK) $(libslurmhelper_la_OBJECTS) $(libslurm_o_LDADD) + + force: -$(convenience_libs) : force +$(libslurm_o_LDADD) $(convenience_libs) : force @cd `dirname $@` && $(MAKE) `basename $@` diff --git a/src/api/Makefile.in b/src/api/Makefile.in index 107f973d1..59d5d3dc0 100644 --- a/src/api/Makefile.in +++ b/src/api/Makefile.in @@ -17,6 +17,7 @@ # # Makefile for slurm API library + VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ @@ -36,6 +37,7 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ +noinst_PROGRAMS = libslurm.o$(EXEEXT) subdir = src/api DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 @@ -99,6 +101,12 @@ am__objects_1 = allocate.lo allocate_msg.lo cancel.lo checkpoint.lo \ triggers.lo reconfigure.lo update_config.lo am_libslurmhelper_la_OBJECTS = $(am__objects_1) libslurmhelper_la_OBJECTS = $(am_libslurmhelper_la_OBJECTS) +PROGRAMS = $(noinst_PROGRAMS) +am_libslurm_o_OBJECTS = +libslurm_o_OBJECTS = $(am_libslurm_o_OBJECTS) +libslurm_o_DEPENDENCIES = $(top_builddir)/src/common/libeio.o \ + $(top_builddir)/src/common/libspank.o \ + $(top_builddir)/src/common/libcommon.o DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp am__depfiles_maybe = depfiles @@ -112,9 +120,9 @@ LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ SOURCES = $(libpmi_la_SOURCES) $(libslurm_la_SOURCES) \ - $(libslurmhelper_la_SOURCES) + $(libslurmhelper_la_SOURCES) $(libslurm_o_SOURCES) DIST_SOURCES = $(libpmi_la_SOURCES) $(libslurm_la_SOURCES) \ - $(libslurmhelper_la_SOURCES) + $(libslurmhelper_la_SOURCES) $(libslurm_o_SOURCES) ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) @@ -336,8 +344,13 @@ lib_LTLIBRARIES = libslurm.la libpmi.la BUILT_SOURCES = $(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT) libslurm.la # Note that libslurmhelper is mostly the same as libslurm, except that -# it exports ALL symbols, including those from libcommon, libeio, etc. -# Also, libslurmhelper is a convenience library, it is not installed. +# it exports ALL symbols used by the process, libcommon, libeio, etc. +# Only link with libslurmhelper if you are sure you are not going to be +# loading a plugin that could use something you yourself are not +# calling from here. +# libslurm.o only contains all the api symbols and will export +# them to plugins that are loaded. +# Also, libslurmhelper, libslurm.o are for convenience, they are not installed. noinst_LTLIBRARIES = libslurmhelper.la slurmapi_src = \ allocate.c \ @@ -367,7 +380,7 @@ slurmapi_src = \ common_dir = $(top_builddir)/src/common slurmapi_add = \ $(common_dir)/libcommon.la \ - $(common_dir)/libspank.la \ + $(common_dir)/libspank.la \ $(common_dir)/libeio.la \ -lpthread @@ -377,7 +390,9 @@ libslurmhelper_la_LDFLAGs = \ $(LIB_LDFLAGS) \ -version-info $(current):$(rev):$(age) -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = \ + $(top_builddir)/src/api/libslurmhelper.la + libslurm_la_SOURCES = libslurm_la_LIBADD = $(convenience_libs) libslurm_la_LDFLAGS = \ @@ -396,6 +411,14 @@ libpmi_la_LIBADD = $(top_builddir)/src/api/libslurm.la libpmi_la_LDFLAGS = $(LIB_LDFLAGS) \ $(PMI_OTHER_FLAGS) + +# This was made so we chould export all symbols from libcommon +# on multiple platforms +libslurm_o_SOURCES = +libslurm_o_LDADD = $(top_builddir)/src/common/libeio.o \ + $(top_builddir)/src/common/libspank.o \ + $(top_builddir)/src/common/libcommon.o + CLEANFILES = \ $(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT) @@ -479,6 +502,13 @@ libslurm.la: $(libslurm_la_OBJECTS) $(libslurm_la_DEPENDENCIES) libslurmhelper.la: $(libslurmhelper_la_OBJECTS) $(libslurmhelper_la_DEPENDENCIES) $(LINK) $(libslurmhelper_la_OBJECTS) $(libslurmhelper_la_LIBADD) $(LIBS) +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; for p in $$list; do \ + f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f $$p $$f"; \ + rm -f $$p $$f ; \ + done + mostlyclean-compile: -rm -f *.$(OBJEXT) @@ -613,7 +643,7 @@ distdir: $(DISTFILES) check-am: all-am check: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) check-am -all-am: Makefile $(LTLIBRARIES) +all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) installdirs: for dir in "$(DESTDIR)$(libdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ @@ -649,7 +679,7 @@ maintainer-clean-generic: clean: clean-am clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ - clean-noinstLTLIBRARIES mostlyclean-am + clean-noinstLTLIBRARIES clean-noinstPROGRAMS mostlyclean-am distclean: distclean-am -rm -rf ./$(DEPDIR) @@ -709,21 +739,25 @@ uninstall-am: uninstall-libLTLIBRARIES .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ clean-libLTLIBRARIES clean-libtool clean-noinstLTLIBRARIES \ - ctags distclean distclean-compile distclean-generic \ - distclean-libtool distclean-tags distdir dvi dvi-am html \ - html-am info info-am install install-am install-data \ - install-data-am install-dvi install-dvi-am install-exec \ - install-exec-am install-html install-html-am install-info \ - install-info-am install-libLTLIBRARIES install-man install-pdf \ - install-pdf-am install-ps install-ps-am install-strip \ - installcheck installcheck-am installdirs maintainer-clean \ - maintainer-clean-generic mostlyclean mostlyclean-compile \ - mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ - tags uninstall uninstall-am uninstall-libLTLIBRARIES - + clean-noinstPROGRAMS ctags distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-libLTLIBRARIES \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \ + uninstall-am uninstall-libLTLIBRARIES + + +libslurm.o: $(libslurmhelper_la_OBJECTS) $(libslurm_o_LDADD) + $(LINK) $(libslurmhelper_la_OBJECTS) $(libslurm_o_LDADD) force: -$(convenience_libs) : force +$(libslurm_o_LDADD) $(convenience_libs) : force @cd `dirname $@` && $(MAKE) `basename $@` # FIXME - don't export the client_io_handler_ symbols once srun is no longer diff --git a/src/api/allocate.c b/src/api/allocate.c index 96769c7c0..0fd089694 100644 --- a/src/api/allocate.c +++ b/src/api/allocate.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * allocate.c - allocate nodes for a job or step with supplied contraints - * $Id: allocate.c 14453 2008-07-08 20:26:18Z da $ + * $Id: allocate.c 14571 2008-07-18 22:25:56Z jette $ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -251,6 +251,8 @@ slurm_allocate_resources_blocking (const job_desc_msg_t *user_req, /* yes, allocation has been granted */ errno = SLURM_PROTOCOL_SUCCESS; } else if (!req->immediate) { + if(resp->error_code != SLURM_SUCCESS) + info("%s", slurm_strerror(resp->error_code)); /* no, we need to wait for a response */ job_id = resp->job_id; slurm_free_resource_allocation_response_msg(resp); @@ -631,6 +633,9 @@ _handle_msg(slurm_msg_t *msg, resource_allocation_response_msg_t **resp) *resp = msg->data; rc = 1; break; + case SRUN_JOB_COMPLETE: + info("Job has been cancelled"); + break; default: error("received spurious message type: %d\n", msg->msg_type); diff --git a/src/api/config_info.c b/src/api/config_info.c index db17884c6..f4afe4578 100644 --- a/src/api/config_info.c +++ b/src/api/config_info.c @@ -61,6 +61,7 @@ extern long slurm_api_version (void) return (long) SLURM_API_VERSION; } + static char * _select_info(uint16_t select_type_param) { @@ -109,7 +110,7 @@ static char *_task_plugin_param(uint16_t task_plugin_param) void slurm_print_ctl_conf ( FILE* out, slurm_ctl_conf_info_msg_t * slurm_ctl_conf_ptr ) { - char time_str[32]; + char time_str[32], tmp_str[128]; if ( slurm_ctl_conf_ptr == NULL ) return ; @@ -153,11 +154,15 @@ void slurm_print_ctl_conf ( FILE* out, slurm_ctl_conf_ptr->control_machine); fprintf(out, "CryptoType = %s\n", slurm_ctl_conf_ptr->crypto_type); - if (slurm_ctl_conf_ptr->def_mem_per_task) { - fprintf(out, "DefMemPerTask = %u\n", + if (slurm_ctl_conf_ptr->def_mem_per_task & MEM_PER_CPU) { + fprintf(out, "DefMemPerCPU = %u\n", + slurm_ctl_conf_ptr->def_mem_per_task & + (~MEM_PER_CPU)); + } else if (slurm_ctl_conf_ptr->def_mem_per_task) { + fprintf(out, "DefMemPerNode = %u\n", slurm_ctl_conf_ptr->def_mem_per_task); } else - fprintf(out, "DefMemPerTask = UNLIMITED\n"); + fprintf(out, "DefMemPerCPU = UNLIMITED\n"); if (slurm_ctl_conf_ptr->disable_root_jobs) fprintf(out, "DisableRootJobs = YES\n"); else @@ -220,11 +225,15 @@ void slurm_print_ctl_conf ( FILE* out, slurm_ctl_conf_ptr->mail_prog); fprintf(out, "MaxJobCount = %u\n", slurm_ctl_conf_ptr->max_job_cnt); - if (slurm_ctl_conf_ptr->max_mem_per_task) { - fprintf(out, "MaxMemPerTask = %u\n", + if (slurm_ctl_conf_ptr->max_mem_per_task & MEM_PER_CPU) { + fprintf(out, "MaxMemPerCPU = %u\n", + slurm_ctl_conf_ptr->max_mem_per_task & + (~MEM_PER_CPU)); + } else if (slurm_ctl_conf_ptr->max_mem_per_task) { + fprintf(out, "MaxMemPerNode = %u\n", slurm_ctl_conf_ptr->max_mem_per_task); } else - fprintf(out, "MaxMemPerTask = UNLIMITED\n"); + fprintf(out, "MaxMemPerCPU = UNLIMITED\n"); fprintf(out, "MessageTimeout = %u\n", slurm_ctl_conf_ptr->msg_timeout); fprintf(out, "MinJobAge = %u\n", @@ -240,8 +249,9 @@ void slurm_print_ctl_conf ( FILE* out, slurm_ctl_conf_ptr->plugindir); fprintf(out, "PlugStackConfig = %s\n", slurm_ctl_conf_ptr->plugstack); - fprintf(out, "PrivateData = %u\n", - slurm_ctl_conf_ptr->private_data); + private_data_string(slurm_ctl_conf_ptr->private_data, + tmp_str, sizeof(tmp_str)); + fprintf(out, "PrivateData = %s\n", tmp_str); fprintf(out, "ProctrackType = %s\n", slurm_ctl_conf_ptr->proctrack_type); fprintf(out, "Prolog = %s\n", diff --git a/src/api/init_msg.c b/src/api/init_msg.c index ec1e1f797..333752b31 100644 --- a/src/api/init_msg.c +++ b/src/api/init_msg.c @@ -1,8 +1,8 @@ /*****************************************************************************\ * init_msg.c - initialize RPC messages contents - * $Id: init_msg.c 13672 2008-03-19 23:10:58Z jette $ ***************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov>. * LLNL-CODE-402394. @@ -55,71 +55,25 @@ */ void slurm_init_job_desc_msg(job_desc_msg_t * job_desc_msg) { - job_desc_msg->account = NULL; - job_desc_msg->acctg_freq = (uint16_t) NO_VAL; - job_desc_msg->alloc_node = NULL; - job_desc_msg->alloc_sid = NO_VAL; - job_desc_msg->comment = NULL; - job_desc_msg->contiguous = (uint16_t) NO_VAL; - job_desc_msg->cpus_per_task = (uint16_t) NO_VAL; - job_desc_msg->ntasks_per_node = (uint16_t) NO_VAL; - job_desc_msg->ntasks_per_socket = (uint16_t) NO_VAL; - job_desc_msg->ntasks_per_core = (uint16_t) NO_VAL; - job_desc_msg->dependency = NULL; - job_desc_msg->environment = ((char **) NULL); - job_desc_msg->env_size = 0; - job_desc_msg->features = NULL; - job_desc_msg->immediate = 0; - job_desc_msg->job_id = NO_VAL; - job_desc_msg->job_min_cores = (uint16_t) NO_VAL; - job_desc_msg->job_min_procs = (uint16_t) NO_VAL; - job_desc_msg->job_min_sockets = (uint16_t) NO_VAL; - job_desc_msg->job_min_threads = (uint16_t) NO_VAL; - job_desc_msg->job_min_memory = NO_VAL; - job_desc_msg->job_min_tmp_disk= NO_VAL; - job_desc_msg->kill_on_node_fail = (uint16_t) NO_VAL; - job_desc_msg->licenses = NULL; - job_desc_msg->name = NULL; - job_desc_msg->network = NULL; - job_desc_msg->nice = NICE_OFFSET; - job_desc_msg->ntasks_per_core = (uint16_t) NO_VAL; - job_desc_msg->ntasks_per_node = (uint16_t) NO_VAL; - job_desc_msg->ntasks_per_socket = (uint16_t) NO_VAL; - job_desc_msg->num_tasks = NO_VAL; - job_desc_msg->open_mode = 0; /* system default */ - job_desc_msg->overcommit = (uint8_t) NO_VAL; - job_desc_msg->partition = NULL; - job_desc_msg->plane_size = (uint16_t) NO_VAL; - job_desc_msg->priority = NO_VAL; - job_desc_msg->req_nodes = NULL; - job_desc_msg->exc_nodes = NULL; - job_desc_msg->script = NULL; - job_desc_msg->argv = ((char **) NULL); - job_desc_msg->argc = 0; - job_desc_msg->shared = (uint16_t) NO_VAL; - job_desc_msg->task_dist = (uint16_t) NO_VAL; - job_desc_msg->time_limit = NO_VAL; - job_desc_msg->num_procs = NO_VAL; - job_desc_msg->max_nodes = NO_VAL; - job_desc_msg->min_nodes = NO_VAL; - job_desc_msg->max_sockets = (uint16_t) NO_VAL; - job_desc_msg->min_sockets = (uint16_t) NO_VAL; - job_desc_msg->max_cores = (uint16_t) NO_VAL; - job_desc_msg->min_cores = (uint16_t) NO_VAL; - job_desc_msg->max_threads = (uint16_t) NO_VAL; - job_desc_msg->min_threads = (uint16_t) NO_VAL; - job_desc_msg->err = NULL; - job_desc_msg->in = NULL; - job_desc_msg->out = NULL; - job_desc_msg->user_id = NO_VAL; - job_desc_msg->group_id = NO_VAL; - job_desc_msg->work_dir = NULL; - job_desc_msg->alloc_resp_port = 0; - job_desc_msg->other_port = 0; - job_desc_msg->mail_type = 0; - job_desc_msg->mail_user = NULL; - job_desc_msg->begin_time = 0; - job_desc_msg->requeue = (uint16_t) NO_VAL; + job_desc_msg->account = NULL; + job_desc_msg->acctg_freq = (uint16_t) NO_VAL; + job_desc_msg->alloc_node = NULL; + job_desc_msg->alloc_resp_port = 0; + job_desc_msg->alloc_sid = NO_VAL; + job_desc_msg->argc = 0; + job_desc_msg->argv = ((char **) NULL); + job_desc_msg->begin_time = 0; + job_desc_msg->blrtsimage = NULL; + job_desc_msg->comment = NULL; + job_desc_msg->conn_type = (uint16_t) NO_VAL; + job_desc_msg->contiguous = (uint16_t) NO_VAL; + job_desc_msg->cpus_per_task = (uint16_t) NO_VAL; + job_desc_msg->dependency = NULL; + job_desc_msg->environment = ((char **) NULL); + job_desc_msg->env_size = 0; + job_desc_msg->err = NULL; + job_desc_msg->exc_nodes = NULL; + job_desc_msg->features = NULL; #if SYSTEM_DIMENSIONS { int i; @@ -127,14 +81,58 @@ void slurm_init_job_desc_msg(job_desc_msg_t * job_desc_msg) job_desc_msg->geometry[i] = (uint16_t) NO_VAL; } #endif - job_desc_msg->conn_type = (uint16_t) NO_VAL; - job_desc_msg->reboot = (uint16_t) NO_VAL; - job_desc_msg->rotate = (uint16_t) NO_VAL; - job_desc_msg->blrtsimage = NULL; - job_desc_msg->linuximage = NULL; - job_desc_msg->mloaderimage = NULL; - job_desc_msg->ramdiskimage = NULL; - job_desc_msg->select_jobinfo = NULL; + job_desc_msg->group_id = NO_VAL; + job_desc_msg->immediate = 0; + job_desc_msg->in = NULL; + job_desc_msg->job_id = NO_VAL; + job_desc_msg->job_min_cores = (uint16_t) NO_VAL; + job_desc_msg->job_min_procs = (uint16_t) NO_VAL; + job_desc_msg->job_min_sockets = (uint16_t) NO_VAL; + job_desc_msg->job_min_threads = (uint16_t) NO_VAL; + job_desc_msg->job_min_memory = NO_VAL; + job_desc_msg->job_min_tmp_disk = NO_VAL; + job_desc_msg->kill_on_node_fail = (uint16_t) NO_VAL; + job_desc_msg->licenses = NULL; + job_desc_msg->linuximage = NULL; + job_desc_msg->mail_type = 0; + job_desc_msg->mail_user = NULL; + job_desc_msg->max_cores = (uint16_t) NO_VAL; + job_desc_msg->max_nodes = NO_VAL; + job_desc_msg->max_sockets = (uint16_t) NO_VAL; + job_desc_msg->max_threads = (uint16_t) NO_VAL; + job_desc_msg->min_cores = (uint16_t) NO_VAL; + job_desc_msg->min_nodes = NO_VAL; + job_desc_msg->min_sockets = (uint16_t) NO_VAL; + job_desc_msg->min_threads = (uint16_t) NO_VAL; + job_desc_msg->mloaderimage = NULL; + job_desc_msg->name = NULL; + job_desc_msg->network = NULL; + job_desc_msg->nice = NICE_OFFSET; + job_desc_msg->ntasks_per_core = (uint16_t) NO_VAL; + job_desc_msg->ntasks_per_node = (uint16_t) NO_VAL; + job_desc_msg->ntasks_per_socket = (uint16_t) NO_VAL; + job_desc_msg->num_procs = NO_VAL; + job_desc_msg->num_tasks = NO_VAL; + job_desc_msg->open_mode = 0; /* system default */ + job_desc_msg->other_port = 0; + job_desc_msg->out = NULL; + job_desc_msg->overcommit = (uint8_t) NO_VAL; + job_desc_msg->partition = NULL; + job_desc_msg->plane_size = (uint16_t) NO_VAL; + job_desc_msg->priority = NO_VAL; + job_desc_msg->ramdiskimage = NULL; + job_desc_msg->reboot = (uint16_t) NO_VAL; + job_desc_msg->resp_host = NULL; + job_desc_msg->req_nodes = NULL; + job_desc_msg->requeue = (uint16_t) NO_VAL; + job_desc_msg->rotate = (uint16_t) NO_VAL; + job_desc_msg->script = NULL; + job_desc_msg->select_jobinfo = NULL; + job_desc_msg->shared = (uint16_t) NO_VAL; + job_desc_msg->task_dist = (uint16_t) NO_VAL; + job_desc_msg->time_limit = NO_VAL; + job_desc_msg->user_id = NO_VAL; + job_desc_msg->work_dir = NULL; } /* diff --git a/src/api/job_info.c b/src/api/job_info.c index dc9c696eb..f44ed7db9 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -1,8 +1,8 @@ /*****************************************************************************\ * job_info.c - get/print the job state information of slurm - * $Id: job_info.c 14298 2008-06-20 16:45:30Z jette $ ***************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> et. al. * LLNL-CODE-402394. @@ -371,13 +371,18 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) xstrcat(out, "\n "); /****** Line 10 ******/ + if (job_ptr->job_min_memory & MEM_PER_CPU) { + job_ptr->job_min_memory &= (~MEM_PER_CPU); + tmp3_ptr = "CPU"; + } else + tmp3_ptr = "Node"; convert_num_unit((float)job_ptr->job_min_memory, tmp1, sizeof(tmp1), UNIT_NONE); convert_num_unit((float)job_ptr->job_min_tmp_disk, tmp2, sizeof(tmp2), UNIT_NONE); snprintf(tmp_line, sizeof(tmp_line), - "MinMemory=%s MinTmpDisk=%s Features=%s", - tmp1, tmp2, job_ptr->features); + "MinMemory%s=%s MinTmpDisk=%s Features=%s", + tmp3_ptr, tmp1, tmp2, job_ptr->features); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); @@ -551,8 +556,9 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) return out; } + /* - * slurm_load_jobs - issue RPC to get slurm all job configuration + * slurm_load_jobs - issue RPC to get all job configuration * information if changed since update_time * IN update_time - time of current configuration data * IN job_info_msg_pptr - place to store a job configuration pointer @@ -598,6 +604,48 @@ slurm_load_jobs (time_t update_time, job_info_msg_t **resp, return SLURM_PROTOCOL_SUCCESS ; } +/* + * slurm_load_job - issue RPC to get job information for one job ID + * IN job_info_msg_pptr - place to store a job configuration pointer + * IN job_id - ID of job we want information about + * RET 0 or -1 on error + * NOTE: free the response using slurm_free_job_info_msg + */ +extern int +slurm_load_job (job_info_msg_t **resp, uint32_t job_id) +{ + int rc; + slurm_msg_t resp_msg; + slurm_msg_t req_msg; + job_id_msg_t req; + + slurm_msg_t_init(&req_msg); + slurm_msg_t_init(&resp_msg); + + req.job_id = job_id; + req_msg.msg_type = REQUEST_JOB_INFO_SINGLE; + req_msg.data = &req; + + if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0) + return SLURM_ERROR; + + switch (resp_msg.msg_type) { + case RESPONSE_JOB_INFO: + *resp = (job_info_msg_t *)resp_msg.data; + break; + case RESPONSE_SLURM_RC: + rc = ((return_code_msg_t *) resp_msg.data)->return_code; + slurm_free_return_code_msg(resp_msg.data); + if (rc) + slurm_seterrno_ret(rc); + break; + default: + slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR); + break; + } + + return SLURM_PROTOCOL_SUCCESS ; +} /* * slurm_pid2jobid - issue RPC to get the slurm job_id given a process_id diff --git a/src/api/step_ctx.c b/src/api/step_ctx.c index 3b3bd1d1c..9cc2ddc8b 100644 --- a/src/api/step_ctx.c +++ b/src/api/step_ctx.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * step_ctx.c - step_ctx task functions for use by AIX/POE * - * $Id: step_ctx.c 13672 2008-03-19 23:10:58Z jette $ + * $Id: step_ctx.c 14469 2008-07-09 18:15:23Z jette $ ***************************************************************************** * Copyright (C) 2004 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -58,12 +58,14 @@ static void _job_fake_cred(struct slurm_step_ctx_struct *ctx) { slurm_cred_arg_t arg; - arg.jobid = ctx->job_id; - arg.stepid = ctx->step_resp->job_step_id; - arg.uid = ctx->user_id; - arg.hostlist = ctx->step_req->node_list; - arg.alloc_lps_cnt = 0; - arg.alloc_lps = NULL; + arg.alloc_lps_cnt = 0; + arg.alloc_lps = NULL; + arg.hostlist = ctx->step_req->node_list; + arg.job_mem = 0; + arg.jobid = ctx->job_id; + arg.stepid = ctx->step_resp->job_step_id; + arg.task_mem = 0; + arg.uid = ctx->user_id; ctx->step_resp->cred = slurm_cred_faker(&arg); } diff --git a/src/api/step_launch.c b/src/api/step_launch.c index 61c100f4b..981fb2da6 100644 --- a/src/api/step_launch.c +++ b/src/api/step_launch.c @@ -410,7 +410,8 @@ void slurm_step_launch_wait_finish(slurm_step_ctx_t *ctx) } if (!force_terminated_job && task_exit_signal) - info("Force Terminated job step"); + info("Force Terminated job step %u.%u", + ctx->job_id, ctx->step_resp->job_step_id); /* Then shutdown the message handler thread */ eio_signal_shutdown(sls->msg_handle); diff --git a/src/common/Makefile.am b/src/common/Makefile.am index b1a86ac22..f6127ebe2 100644 --- a/src/common/Makefile.am +++ b/src/common/Makefile.am @@ -20,7 +20,7 @@ endif INCLUDES = -I$(top_srcdir) -noinst_PROGRAMS = libcommon.o +noinst_PROGRAMS = libcommon.o libeio.o libspank.o noinst_LTLIBRARIES = \ libcommon.la \ @@ -118,14 +118,24 @@ libcommon_la_LIBADD = -ldl libcommon_la_LDFLAGS = $(LIB_LDFLAGS) -module --export-dynamic -libcommon_o_SOURCES = - - # This was made so we chould export all symbols from libcommon # on multiple platforms +libcommon_o_SOURCES = libcommon.o : $(libcommon_la_OBJECTS) $(libcommon_la_DEPENDENCIES) $(libcommon_la_LINK) $(libcommon_la_OBJECTS) +# This was made so we chould export all symbols from libeio +# on multiple platforms +libeio_o_SOURCES = +libeio.o : $(libeio_la_OBJECTS) $(libeio_la_DEPENDENCIES) + $(LINK) $(libeio_la_OBJECTS) + +# This was made so we chould export all symbols from libspank +# on multiple platforms +libspank_o_SOURCES = +libspank.o : $(libspank_la_OBJECTS) $(libspank_la_DEPENDENCIES) + $(LINK) $(libspank_la_OBJECTS) + global_defaults.c : $(top_builddir)/config.h Makefile @( echo "/* This file autogenerated by src/common/Makefile */"; \ diff --git a/src/common/Makefile.in b/src/common/Makefile.in index 10ada4422..e8dddce1a 100644 --- a/src/common/Makefile.in +++ b/src/common/Makefile.in @@ -44,7 +44,8 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ -noinst_PROGRAMS = libcommon.o$(EXEEXT) +noinst_PROGRAMS = libcommon.o$(EXEEXT) libeio.o$(EXEEXT) \ + libspank.o$(EXEEXT) subdir = src/common DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 @@ -142,6 +143,12 @@ PROGRAMS = $(noinst_PROGRAMS) am_libcommon_o_OBJECTS = libcommon_o_OBJECTS = $(am_libcommon_o_OBJECTS) libcommon_o_LDADD = $(LDADD) +am_libeio_o_OBJECTS = +libeio_o_OBJECTS = $(am_libeio_o_OBJECTS) +libeio_o_LDADD = $(LDADD) +am_libspank_o_OBJECTS = +libspank_o_OBJECTS = $(am_libspank_o_OBJECTS) +libspank_o_LDADD = $(LDADD) DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp am__depfiles_maybe = depfiles @@ -156,11 +163,13 @@ LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ $(LDFLAGS) -o $@ SOURCES = $(libcommon_la_SOURCES) $(EXTRA_libcommon_la_SOURCES) \ $(libdaemonize_la_SOURCES) $(libeio_la_SOURCES) \ - $(libspank_la_SOURCES) $(libcommon_o_SOURCES) + $(libspank_la_SOURCES) $(libcommon_o_SOURCES) \ + $(libeio_o_SOURCES) $(libspank_o_SOURCES) DIST_SOURCES = $(am__libcommon_la_SOURCES_DIST) \ $(am__EXTRA_libcommon_la_SOURCES_DIST) \ $(libdaemonize_la_SOURCES) $(libeio_la_SOURCES) \ - $(libspank_la_SOURCES) $(libcommon_o_SOURCES) + $(libspank_la_SOURCES) $(libcommon_o_SOURCES) \ + $(libeio_o_SOURCES) $(libspank_o_SOURCES) ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) @@ -431,7 +440,18 @@ libspank_la_SOURCES = \ libcommon_la_LIBADD = -ldl libcommon_la_LDFLAGS = $(LIB_LDFLAGS) -module --export-dynamic + +# This was made so we chould export all symbols from libcommon +# on multiple platforms libcommon_o_SOURCES = + +# This was made so we chould export all symbols from libeio +# on multiple platforms +libeio_o_SOURCES = + +# This was made so we chould export all symbols from libspank +# on multiple platforms +libspank_o_SOURCES = all: all-am .SUFFIXES: @@ -762,11 +782,12 @@ uninstall-am: mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags uninstall uninstall-am - -# This was made so we chould export all symbols from libcommon -# on multiple platforms libcommon.o : $(libcommon_la_OBJECTS) $(libcommon_la_DEPENDENCIES) $(libcommon_la_LINK) $(libcommon_la_OBJECTS) +libeio.o : $(libeio_la_OBJECTS) $(libeio_la_DEPENDENCIES) + $(LINK) $(libeio_la_OBJECTS) +libspank.o : $(libspank_la_OBJECTS) $(libspank_la_DEPENDENCIES) + $(LINK) $(libspank_la_OBJECTS) global_defaults.c : $(top_builddir)/config.h Makefile @( echo "/* This file autogenerated by src/common/Makefile */"; \ diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c index 21a5b2d2a..39a034312 100644 --- a/src/common/assoc_mgr.c +++ b/src/common/assoc_mgr.c @@ -44,12 +44,14 @@ #include "src/slurmdbd/read_config.h" static List local_association_list = NULL; +static List local_qos_list = NULL; static List local_user_list = NULL; static char *local_cluster_name = NULL; void (*remove_assoc_notify) (acct_association_rec_t *rec) = NULL; static pthread_mutex_t local_association_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t local_qos_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t local_user_lock = PTHREAD_MUTEX_INITIALIZER; /* locks should be put in place before calling this function */ @@ -144,6 +146,28 @@ static int _get_local_association_list(void *db_conn, int enforce) return SLURM_SUCCESS; } +static int _get_local_qos_list(void *db_conn, int enforce) +{ + slurm_mutex_lock(&local_qos_lock); + if(local_qos_list) + list_destroy(local_qos_list); + local_qos_list = acct_storage_g_get_qos(db_conn, NULL); + + if(!local_qos_list) { + slurm_mutex_unlock(&local_qos_lock); + if(enforce) { + error("_get_local_qos_list: " + "no list was made."); + return SLURM_ERROR; + } else { + return SLURM_SUCCESS; + } + } + + slurm_mutex_unlock(&local_qos_lock); + return SLURM_SUCCESS; +} + static int _get_local_user_list(void *db_conn, int enforce) { acct_user_cond_t user_q; @@ -204,6 +228,10 @@ extern int assoc_mgr_init(void *db_conn, assoc_init_args_t *args) if(_get_local_association_list(db_conn, enforce) == SLURM_ERROR) return SLURM_ERROR; + if(!local_qos_list) + if(_get_local_qos_list(db_conn, enforce) == SLURM_ERROR) + return SLURM_ERROR; + if(!local_user_list) if(_get_local_user_list(db_conn, enforce) == SLURM_ERROR) return SLURM_ERROR; @@ -215,10 +243,13 @@ extern int assoc_mgr_fini(void) { if(local_association_list) list_destroy(local_association_list); + if(local_qos_list) + list_destroy(local_qos_list); if(local_user_list) list_destroy(local_user_list); xfree(local_cluster_name); local_association_list = NULL; + local_qos_list = NULL; local_user_list = NULL; return SLURM_SUCCESS; @@ -293,14 +324,14 @@ extern int assoc_mgr_fill_in_assoc(void *db_conn, acct_association_rec_t *assoc, "nonuser association"); continue; } else if(assoc->uid != found_assoc->uid) { - debug3("not the right user %u != %u", + debug4("not the right user %u != %u", assoc->uid, found_assoc->uid); continue; } if(found_assoc->acct && strcasecmp(assoc->acct, found_assoc->acct)) { - debug3("not the right account %s != %s", + debug4("not the right account %s != %s", assoc->acct, found_assoc->acct); continue; } @@ -309,7 +340,7 @@ extern int assoc_mgr_fill_in_assoc(void *db_conn, acct_association_rec_t *assoc, if(!local_cluster_name && found_assoc->cluster && strcasecmp(assoc->cluster, found_assoc->cluster)) { - debug3("not the right cluster"); + debug4("not the right cluster"); continue; } @@ -491,11 +522,11 @@ extern int assoc_mgr_update_local_assocs(acct_update_object_t *update) continue; } else { if(!object->user && rec->user) { - debug3("we are looking for a " + debug4("we are looking for a " "nonuser association"); continue; } else if(object->uid != rec->uid) { - debug3("not the right user"); + debug4("not the right user"); continue; } @@ -503,7 +534,7 @@ extern int assoc_mgr_update_local_assocs(acct_update_object_t *update) && (!rec->acct || strcasecmp(object->acct, rec->acct))) { - debug3("not the right account"); + debug4("not the right account"); continue; } @@ -512,7 +543,7 @@ extern int assoc_mgr_update_local_assocs(acct_update_object_t *update) && (!rec->cluster || strcasecmp(object->cluster, rec->cluster))) { - debug3("not the right cluster"); + debug4("not the right cluster"); continue; } @@ -520,7 +551,7 @@ extern int assoc_mgr_update_local_assocs(acct_update_object_t *update) && (!rec->partition || strcasecmp(object->partition, rec->partition))) { - debug3("not the right partition"); + debug4("not the right partition"); continue; } break; @@ -655,9 +686,13 @@ extern int assoc_mgr_update_local_users(acct_update_object_t *update) object->default_acct = NULL; } - if(object->qos != ACCT_QOS_NOTSET) - rec->qos = object->qos; - + if(object->qos_list) { + if(rec->qos_list) + list_destroy(rec->qos_list); + rec->qos_list = object->qos_list; + object->qos_list = NULL; + } + if(object->admin_level != ACCT_ADMIN_NOTSET) rec->admin_level = rec->admin_level; @@ -674,6 +709,7 @@ extern int assoc_mgr_update_local_users(acct_update_object_t *update) object->uid = NO_VAL; list_append(local_user_list, object); + break; case ACCT_REMOVE_USER: if(!rec) { //rc = SLURM_ERROR; @@ -712,6 +748,55 @@ extern int assoc_mgr_update_local_users(acct_update_object_t *update) return rc; } +extern int assoc_mgr_update_local_qos(acct_update_object_t *update) +{ + acct_qos_rec_t * rec = NULL; + acct_qos_rec_t * object = NULL; + + ListIterator itr = NULL; + int rc = SLURM_SUCCESS; + + if(!local_qos_list) + return SLURM_SUCCESS; + + slurm_mutex_lock(&local_qos_lock); + itr = list_iterator_create(local_qos_list); + while((object = list_pop(update->objects))) { + list_iterator_reset(itr); + while((rec = list_next(itr))) { + if(object->id == rec->id) { + break; + } + } + //info("%d qos %s", update->type, object->name); + switch(update->type) { + case ACCT_ADD_QOS: + if(rec) { + //rc = SLURM_ERROR; + break; + } + list_append(local_qos_list, object); + break; + case ACCT_REMOVE_QOS: + if(!rec) { + //rc = SLURM_ERROR; + break; + } + list_delete_item(itr); + break; + default: + break; + } + if(update->type != ACCT_ADD_QOS) { + destroy_acct_qos_rec(object); + } + } + list_iterator_destroy(itr); + slurm_mutex_unlock(&local_qos_lock); + + return rc; +} + extern int assoc_mgr_validate_assoc_id(void *db_conn, uint32_t assoc_id, int enforce) diff --git a/src/common/assoc_mgr.h b/src/common/assoc_mgr.h index 512b753d4..df9b26f9b 100644 --- a/src/common/assoc_mgr.h +++ b/src/common/assoc_mgr.h @@ -107,6 +107,13 @@ extern int assoc_mgr_fini(void); */ extern int assoc_mgr_update_local_assocs(acct_update_object_t *update); +/* + * update qos in local cache + * IN: acct_update_object_t *object + * RET: SLURM_SUCCESS on success (or not found) SLURM_ERROR else + */ +extern int assoc_mgr_update_local_qos(acct_update_object_t *update); + /* * update users in local cache * IN: acct_update_object_t *object diff --git a/src/common/jobacct_common.c b/src/common/jobacct_common.c index 847e171f5..0529d9db4 100644 --- a/src/common/jobacct_common.c +++ b/src/common/jobacct_common.c @@ -197,8 +197,6 @@ extern void destroy_jobacct_selected_step(void *object) { jobacct_selected_step_t *step = (jobacct_selected_step_t *)object; if (step) { - xfree(step->job); - xfree(step->step); xfree(step); } } @@ -380,8 +378,6 @@ unpack_error: extern void pack_jobacct_selected_step(jobacct_selected_step_t *step, Buf buffer) { - packstr(step->job, buffer); - packstr(step->step, buffer); pack32(step->jobid, buffer); pack32(step->stepid, buffer); } @@ -389,14 +385,11 @@ extern void pack_jobacct_selected_step(jobacct_selected_step_t *step, extern int unpack_jobacct_selected_step(jobacct_selected_step_t **step, Buf buffer) { - uint32_t uint32_tmp; jobacct_selected_step_t *step_ptr = xmalloc(sizeof(jobacct_selected_step_t)); *step = step_ptr; - safe_unpackstr_xmalloc(&step_ptr->job, &uint32_tmp, buffer); - safe_unpackstr_xmalloc(&step_ptr->step, &uint32_tmp, buffer); safe_unpack32(&step_ptr->jobid, buffer); safe_unpack32(&step_ptr->stepid, buffer); @@ -830,6 +823,8 @@ extern struct jobacctinfo *jobacct_common_stat_task(pid_t pid) break; } list_iterator_destroy(itr); + if (jobacct == NULL) + goto error; ret_jobacct = xmalloc(sizeof(struct jobacctinfo)); memcpy(ret_jobacct, jobacct, sizeof(struct jobacctinfo)); error: diff --git a/src/common/jobacct_common.h b/src/common/jobacct_common.h index bb373fd9d..af391dd23 100644 --- a/src/common/jobacct_common.h +++ b/src/common/jobacct_common.h @@ -64,6 +64,7 @@ #include <ctype.h> #define BUFFER_SIZE 4096 +#define FDUMP_FLAG 0x04 typedef struct { uint16_t taskid; /* contains which task number it was on */ @@ -86,30 +87,34 @@ typedef struct { } sacct_t; typedef struct { - char *opt_cluster; /* --cluster */ + List opt_acct_list; /* --accounts list of char * */ + int opt_begin; /* --begin */ + List opt_cluster_list; /* --cluster list of char * */ int opt_completion; /* --completion */ int opt_dump; /* --dump */ int opt_dup; /* --duplicates; +1 = explicitly set */ + int opt_end; /* --end */ + long opt_expire; /* --expire= */ + char *opt_expire_timespec; /* --expire= */ int opt_fdump; /* --formattted_dump */ - int opt_stat; /* --stat */ - int opt_gid; /* --gid (-1=wildcard, 0=root) */ - int opt_header; /* can only be cleared */ + char *opt_field_list; /* --fields= */ + char *opt_filein; /* --file */ + int opt_gid; /* running persons gid */ + List opt_gid_list; /* --gid list of char * */ int opt_help; /* --help */ + List opt_job_list; /* --jobs */ int opt_long; /* --long */ int opt_lowmem; /* --low_memory */ - int opt_raw; /* --raw */ + int opt_noheader; /* can only be cleared */ + List opt_partition_list;/* --partitions */ int opt_purge; /* --purge */ + int opt_raw; /* --raw */ + int opt_stat; /* --stat */ + List opt_state_list; /* --states */ int opt_total; /* --total */ - int opt_uid; /* --uid (-1=wildcard, 0=root) */ - int opt_uid_set; + int opt_uid; /* running persons uid */ + List opt_uid_list; /* --uid list of char * */ int opt_verbose; /* --verbose */ - long opt_expire; /* --expire= */ - char *opt_expire_timespec; /* --expire= */ - char *opt_field_list; /* --fields= */ - char *opt_filein; /* --file */ - char *opt_job_list; /* --jobs */ - char *opt_partition_list;/* --partitions */ - char *opt_state_list; /* --states */ } sacct_parameters_t; typedef struct { @@ -179,8 +184,6 @@ typedef struct { } jobacct_step_rec_t; typedef struct selected_step_t { - char *job; - char *step; uint32_t jobid; uint32_t stepid; } jobacct_selected_step_t; diff --git a/src/common/pack.c b/src/common/pack.c index d48f3c041..07863aa8c 100644 --- a/src/common/pack.c +++ b/src/common/pack.c @@ -2,7 +2,8 @@ * pack.c - lowest level un/pack functions * NOTE: The memory buffer will expand as needed using xrealloc() ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Jim Garlick <garlick@llnl.gov>, * Morris Jette <jette1@llnl.gov>, et. al. @@ -93,6 +94,11 @@ Buf create_buf(char *data, int size) { Buf my_buf; + if (size > MAX_BUF_SIZE) { + error("create_buf: buffer size too large"); + return NULL; + } + my_buf = xmalloc(sizeof(struct slurm_buf)); my_buf->magic = BUF_MAGIC; my_buf->size = size; @@ -114,6 +120,11 @@ void free_buf(Buf my_buf) /* Grow a buffer by the specified amount */ void grow_buf (Buf buffer, int size) { + if (buffer->size > (MAX_BUF_SIZE - size)) { + error("grow_buf: buffer size too large"); + return; + } + buffer->size += size; xrealloc(buffer->head, buffer->size); } @@ -123,6 +134,11 @@ Buf init_buf(int size) { Buf my_buf; + if (size > MAX_BUF_SIZE) { + error("init_buf: buffer size too large"); + return NULL; + } + my_buf = xmalloc(sizeof(struct slurm_buf)); my_buf->magic = BUF_MAGIC; my_buf->size = size; @@ -153,6 +169,10 @@ void pack_time(time_t val, Buf buffer) int64_t n64 = HTON_int64((int64_t) val); if (remaining_buf(buffer) < sizeof(n64)) { + if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) { + error("pack_time: buffer size too large"); + return; + } buffer->size += BUF_SIZE; xrealloc(buffer->head, buffer->size); } @@ -184,6 +204,10 @@ void pack64(uint64_t val, Buf buffer) uint64_t nl = HTON_uint64(val); if (remaining_buf(buffer) < sizeof(nl)) { + if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) { + error("pack64: buffer size too large"); + return; + } buffer->size += BUF_SIZE; xrealloc(buffer->head, buffer->size); } @@ -217,6 +241,10 @@ void pack32(uint32_t val, Buf buffer) uint32_t nl = htonl(val); if (remaining_buf(buffer) < sizeof(nl)) { + if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) { + error("pack32: buffer size too large"); + return; + } buffer->size += BUF_SIZE; xrealloc(buffer->head, buffer->size); } @@ -308,6 +336,10 @@ void pack16(uint16_t val, Buf buffer) uint16_t ns = htons(val); if (remaining_buf(buffer) < sizeof(ns)) { + if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) { + error("pack16: buffer size too large"); + return; + } buffer->size += BUF_SIZE; xrealloc(buffer->head, buffer->size); } @@ -340,6 +372,10 @@ int unpack16(uint16_t * valp, Buf buffer) void pack8(uint8_t val, Buf buffer) { if (remaining_buf(buffer) < sizeof(uint8_t)) { + if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) { + error("pack8: buffer size too large"); + return; + } buffer->size += BUF_SIZE; xrealloc(buffer->head, buffer->size); } @@ -372,6 +408,10 @@ void packmem(char *valp, uint32_t size_val, Buf buffer) uint32_t ns = htonl(size_val); if (remaining_buf(buffer) < (sizeof(ns) + size_val)) { + if (buffer->size > (MAX_BUF_SIZE - size_val - BUF_SIZE)) { + error("packmem: buffer size too large"); + return; + } buffer->size += (size_val + BUF_SIZE); xrealloc(buffer->head, buffer->size); } @@ -520,6 +560,10 @@ void packstr_array(char **valp, uint32_t size_val, Buf buffer) uint32_t ns = htonl(size_val); if (remaining_buf(buffer) < sizeof(ns)) { + if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) { + error("packstr_array: buffer size too large"); + return; + } buffer->size += BUF_SIZE; xrealloc(buffer->head, buffer->size); } @@ -574,6 +618,10 @@ int unpackstr_array(char ***valp, uint32_t * size_valp, Buf buffer) void packmem_array(char *valp, uint32_t size_val, Buf buffer) { if (remaining_buf(buffer) < size_val) { + if (buffer->size > (MAX_BUF_SIZE - size_val - BUF_SIZE)) { + error("packmem_array: buffer size too large"); + return; + } buffer->size += (size_val + BUF_SIZE); xrealloc(buffer->head, buffer->size); } diff --git a/src/common/pack.h b/src/common/pack.h index 3e537b6e0..40dad8a80 100644 --- a/src/common/pack.h +++ b/src/common/pack.h @@ -2,7 +2,8 @@ * pack.h - definitions for lowest level un/pack functions. all functions * utilize a Buf structure. Call init_buf, un/pack, and free_buf ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Kevin Tew <tew1@llnl.gov>, Morris Jette <jette1@llnl.gov>, et. al. * LLNL-CODE-402394. @@ -58,6 +59,7 @@ #define BUF_MAGIC 0x42554545 #define BUF_SIZE (16 * 1024) +#define MAX_BUF_SIZE ((uint32_t) 0xffff0000) /* avoid going over 32-bits */ struct slurm_buf { uint32_t magic; diff --git a/src/common/plugin.c b/src/common/plugin.c index 143e82ee0..cab64e29b 100644 --- a/src/common/plugin.c +++ b/src/common/plugin.c @@ -301,7 +301,7 @@ plugin_get_syms( plugin_handle_t plug, if ( ptrs[ i ] ) ++count; else - debug4("Couldn't find sym '%s' in the plugin", + debug3("Couldn't find sym '%s' in the plugin", names[ i ]); } diff --git a/src/common/plugstack.c b/src/common/plugstack.c index 09e9899df..831c5344a 100644 --- a/src/common/plugstack.c +++ b/src/common/plugstack.c @@ -63,6 +63,7 @@ struct spank_plugin_operations { spank_f *init; + spank_f *init_post_opt; spank_f *local_user_init; spank_f *user_init; spank_f *user_task_init; @@ -71,9 +72,10 @@ struct spank_plugin_operations { spank_f *exit; }; -const int n_spank_syms = 7; +const int n_spank_syms = 8; const char *spank_syms[] = { "slurm_spank_init", + "slurm_spank_init_post_opt", "slurm_spank_local_user_init", "slurm_spank_user_init", "slurm_spank_task_init", @@ -131,6 +133,7 @@ typedef enum spank_handle_type { */ typedef enum step_fn { SPANK_INIT = 0, + SPANK_INIT_POST_OPT, LOCAL_USER_INIT, STEP_USER_INIT, STEP_USER_TASK_INIT, @@ -449,6 +452,8 @@ static const char *_step_fn_name(step_fn_t type) switch (type) { case SPANK_INIT: return ("init"); + case SPANK_INIT_POST_OPT: + return ("init_post_opt"); case LOCAL_USER_INIT: return ("local_user_init"); case STEP_USER_INIT: @@ -498,6 +503,14 @@ static int _do_call_stack(step_fn_t type, void * job, int taskid) fn_name, rc); } break; + case SPANK_INIT_POST_OPT: + if (sp->ops.init_post_opt) { + rc = (*sp->ops.init_post_opt) (spank, sp->ac, + sp->argv); + debug2("spank: %s: %s = %d\n", name, + fn_name, rc); + } + break; case LOCAL_USER_INIT: if (sp->ops.local_user_init) { rc = (*sp->ops.local_user_init) (spank, sp->ac, @@ -586,6 +599,9 @@ int spank_init(slurmd_job_t * job) return (-1); } + if (_do_call_stack(SPANK_INIT_POST_OPT, job, -1) < 0) + return (-1); + return (0); } @@ -1110,7 +1126,7 @@ global_to_local_id (slurmd_job_t *job, uint32_t gid, uint32_t *p2uint32) /* * Return 1 if spank_item_t is valid for S_TYPE_LOCAL */ -static int valid_in_local_context (spank_item_t item) +static int _valid_in_local_context (spank_item_t item) { int rc = 0; switch (item) { @@ -1130,6 +1146,24 @@ static int valid_in_local_context (spank_item_t item) return (rc); } +/* + * Return 1 if spank_item_t is just getting version (valid anywhere) + */ +static int _version_check (spank_item_t item) +{ + int rc = 0; + switch (item) { + case S_SLURM_VERSION: + case S_SLURM_VERSION_MAJOR: + case S_SLURM_VERSION_MINOR: + case S_SLURM_VERSION_MICRO: + rc = 1; + break; + default: + rc = 0; + } + return (rc); +} /* * Global functions for SPANK plugins @@ -1172,6 +1206,7 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) pid_t *p2pid; pid_t pid; char ***p2argv; + char **p2vers; slurmd_task_info_t *task; slurmd_job_t *slurmd_job = NULL; struct spank_launcher_job_info *launcher_job = NULL; @@ -1181,17 +1216,20 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) if ((spank == NULL) || (spank->magic != SPANK_MAGIC)) return (ESPANK_BAD_ARG); - if ( (spank->type != S_TYPE_REMOTE) - && (!valid_in_local_context(item))) - return (ESPANK_NOT_REMOTE); + if (!_version_check(item)) { + /* Need job pointer to process other items */ + if ( (spank->type != S_TYPE_REMOTE) + && (!_valid_in_local_context(item))) + return (ESPANK_NOT_REMOTE); - if (spank->job == NULL) - return (ESPANK_BAD_ARG); + if (spank->job == NULL) + return (ESPANK_BAD_ARG); - if (spank->type == S_TYPE_LOCAL) - launcher_job = spank->job; - else - slurmd_job = spank->job; + if (spank->type == S_TYPE_LOCAL) + launcher_job = spank->job; + else + slurmd_job = spank->job; + } va_start(vargs, item); switch (item) { @@ -1255,6 +1293,10 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) p2uint16 = va_arg(vargs, uint16_t *); *p2uint16 = slurmd_job->cpus; break; + case S_STEP_CPUS_PER_TASK: + p2uint32 = va_arg(vargs, uint32_t *); + *p2uint32 = slurmd_job->cpus_per_task; + break; case S_JOB_ARGV: p2int = va_arg(vargs, int *); p2argv = va_arg(vargs, char ***); @@ -1343,6 +1385,22 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) p2uint32 = va_arg(vargs, uint32_t *); rc = global_to_local_id (slurmd_job, uint32, p2uint32); break; + case S_SLURM_VERSION: + p2vers = va_arg(vargs, char **); + *p2vers = SLURM_VERSION; + break; + case S_SLURM_VERSION_MAJOR: + p2vers = va_arg(vargs, char **); + *p2vers = SLURM_MAJOR; + break; + case S_SLURM_VERSION_MINOR: + p2vers = va_arg(vargs, char **); + *p2vers = SLURM_MINOR; + break; + case S_SLURM_VERSION_MICRO: + p2vers = va_arg(vargs, char **); + *p2vers = SLURM_MICRO; + break; default: rc = ESPANK_BAD_ARG; break; diff --git a/src/common/print_fields.c b/src/common/print_fields.c index 398a8597f..cb5d3ed85 100644 --- a/src/common/print_fields.c +++ b/src/common/print_fields.c @@ -41,6 +41,18 @@ int print_fields_parsable_print = 0; int print_fields_have_header = 1; +static int _sort_char_list(char *name_a, char *name_b) +{ + int diff = strcmp(name_a, name_b); + + if (diff < 0) + return -1; + else if (diff > 0) + return 1; + + return 0; +} + extern void destroy_print_field(void *object) { print_field_t *field = (print_field_t *)object; @@ -54,242 +66,151 @@ extern void destroy_print_field(void *object) extern void print_fields_header(List print_fields_list) { ListIterator itr = NULL; - print_field_t *object = NULL; + print_field_t *field = NULL; if(!print_fields_list || !print_fields_have_header) return; itr = list_iterator_create(print_fields_list); - while((object = list_next(itr))) { - (object->print_routine)(SLURM_PRINT_HEADLINE, object, 0); + while((field = list_next(itr))) { + if(print_fields_parsable_print) + printf("%s|", field->name); + else + printf("%-*.*s ", field->len, field->len, field->name); } list_iterator_reset(itr); printf("\n"); if(print_fields_parsable_print) return; - while((object = list_next(itr))) { - (object->print_routine)(SLURM_PRINT_UNDERSCORE, object, 0); + while((field = list_next(itr))) { + printf("%-*.*s ", field->len, field->len, + "-----------------------------------------------------"); } list_iterator_destroy(itr); printf("\n"); } -extern void print_fields_date(void) +extern void print_fields_date(print_field_t *field, time_t value) { - time_t now; - - now = time(NULL); - printf("%s", ctime(&now)); + char temp_char[field->len]; + time_t now = value; + if(!now) + now = time(NULL); + slurm_make_time_str(&value, (char *)temp_char, field->len); + if(print_fields_parsable_print) + printf("%s|", temp_char); + else + printf("%-*.*s ", field->len, field->len, temp_char); } -extern void print_fields_str(type_t type, print_field_t *field, char *value) +extern void print_fields_str(print_field_t *field, char *value) { - char *print_this = value; + char temp_char[field->len]; + char *print_this = NULL; - switch(type) { - case SLURM_PRINT_HEADLINE: + if(!value) { if(print_fields_parsable_print) - printf("%s|", field->name); + print_this = ""; else - printf("%-*.*s ", field->len, field->len, field->name); - break; - case SLURM_PRINT_UNDERSCORE: - if(!print_fields_parsable_print) - printf("%-*.*s ", field->len, field->len, - "---------------------------------------"); - break; - case SLURM_PRINT_VALUE: + print_this = " "; + } + + if(print_fields_parsable_print) + printf("%s|", value); + else { if(!print_this) { - if(print_fields_parsable_print) - print_this = ""; - else - print_this = " "; - } - - if(print_fields_parsable_print) - printf("%s|", print_this); - else { - if(strlen(print_this) > field->len) - print_this[field->len-1] = '+'; - - printf("%-*.*s ", field->len, field->len, print_this); + memcpy(&temp_char, value, field->len); + + if(strlen(value) > field->len) + temp_char[field->len-1] = '+'; + print_this = temp_char; } - break; - default: - if(print_fields_parsable_print) - printf("%s|", "n/a"); - else - printf("%-*s ", field->len, "n/a"); - break; + printf("%-*.*s ", field->len, field->len, print_this); } } -extern void print_fields_uint32(type_t type, print_field_t *field, - uint32_t value) +extern void print_fields_uint32(print_field_t *field, uint32_t value) { - switch(type) { - case SLURM_PRINT_HEADLINE: + /* (value == unset) || (value == cleared) */ + if((value == NO_VAL) || (value == INFINITE)) { if(print_fields_parsable_print) - printf("%s|", field->name); - else - printf("%*.*s ", field->len, field->len, field->name); - break; - case SLURM_PRINT_UNDERSCORE: - if(!print_fields_parsable_print) - printf("%*.*s ", field->len, field->len, - "---------------------------------------"); - break; - case SLURM_PRINT_VALUE: - /* (value == unset) || (value == cleared) */ - if((value == NO_VAL) || (value == INFINITE)) { - if(print_fields_parsable_print) - printf("|"); - else - printf("%*s ", field->len, " "); - } else { - if(print_fields_parsable_print) - printf("%u|", value); - else - printf("%*u ", field->len, value); - } - break; - default: + printf("|"); + else + printf("%*s ", field->len, " "); + } else { if(print_fields_parsable_print) - printf("%s|", "n/a"); + printf("%u|", value); else - printf("%*.*s ", field->len, field->len, "n/a"); - break; + printf("%*u ", field->len, value); } } -extern void print_fields_uint64(type_t type, print_field_t *field, - uint64_t value) +extern void print_fields_uint64(print_field_t *field, uint64_t value) { - switch(type) { - case SLURM_PRINT_HEADLINE: + /* (value == unset) || (value == cleared) */ + if((value == NO_VAL) || (value == INFINITE)) { if(print_fields_parsable_print) - printf("%s|", field->name); - else - printf("%*.*s ", field->len, field->len, field->name); - break; - case SLURM_PRINT_UNDERSCORE: - if(!print_fields_parsable_print) - printf("%*.*s ", field->len, field->len, - "---------------------------------------"); - break; - case SLURM_PRINT_VALUE: - /* (value == unset) || (value == cleared) */ - if((value == NO_VAL) || (value == INFINITE)) { - if(print_fields_parsable_print) - printf("|"); - else - printf("%*s ", field->len, " "); - } else { - if(print_fields_parsable_print) - printf("%llu|", (long long unsigned) value); - else - printf("%*llu ", field->len, - (long long unsigned) value); - } - break; - default: + printf("|"); + else + printf("%*s ", field->len, " "); + } else { if(print_fields_parsable_print) - printf("%s|", "n/a"); + printf("%llu|", (long long unsigned) value); else - printf("%*.*s ", field->len, field->len, "n/a"); - break; + printf("%*llu ", field->len, + (long long unsigned) value); } } -extern void print_fields_time(type_t type, print_field_t *field, uint32_t value) +extern void print_fields_time(print_field_t *field, uint32_t value) { - switch(type) { - case SLURM_PRINT_HEADLINE: + /* (value == unset) || (value == cleared) */ + if((value == NO_VAL) || (value == INFINITE)) { if(print_fields_parsable_print) - printf("%s|", field->name); + printf("|"); else - printf("%*.*s ", field->len, field->len, field->name); - break; - case SLURM_PRINT_UNDERSCORE: - if(!print_fields_parsable_print) - printf("%*.*s ", field->len, field->len, - "---------------------------------------"); - break; - case SLURM_PRINT_VALUE: - /* (value == unset) || (value == cleared) */ - if((value == NO_VAL) || (value == INFINITE)) { - if(print_fields_parsable_print) - printf("|"); - else - printf("%*s ", field->len, " "); - } else { - char time_buf[32]; - mins2time_str((time_t) value, - time_buf, sizeof(time_buf)); - if(print_fields_parsable_print) - printf("%s|", time_buf); - else - printf("%*s ", field->len, time_buf); - } - break; - default: - printf("%*.*s ", field->len, field->len, "n/a"); - break; + printf("%*s ", field->len, " "); + } else { + char time_buf[32]; + mins2time_str((time_t) value, time_buf, sizeof(time_buf)); + if(print_fields_parsable_print) + printf("%s|", time_buf); + else + printf("%*s ", field->len, time_buf); } } -extern void print_fields_char_list(type_t type, print_field_t *field, - List value) +extern void print_fields_char_list(print_field_t *field, List value) { ListIterator itr = NULL; char *print_this = NULL; char *object = NULL; - switch(type) { - case SLURM_PRINT_HEADLINE: + if(!value || !list_count(value)) { if(print_fields_parsable_print) - printf("%s|", field->name); + print_this = xstrdup(""); else - printf("%-*.*s ", field->len, field->len, field->name); - break; - case SLURM_PRINT_UNDERSCORE: - if(!print_fields_parsable_print) - printf("%-*.*s ", field->len, field->len, - "---------------------------------------"); - break; - case SLURM_PRINT_VALUE: - if(!value || !list_count(value)) { - if(print_fields_parsable_print) - print_this = xstrdup(""); - else - print_this = xstrdup(" "); - } else { - itr = list_iterator_create(value); - while((object = list_next(itr))) { - if(print_this) - xstrfmtcat(print_this, ",%s", object); - else - print_this = xstrdup(object); - } - list_iterator_destroy(itr); - } - - if(print_fields_parsable_print) - printf("%s|", print_this); - else { - if(strlen(print_this) > field->len) - print_this[field->len-1] = '+'; - - printf("%-*.*s ", field->len, field->len, print_this); + print_this = xstrdup(" "); + } else { + list_sort(value, (ListCmpF)_sort_char_list); + itr = list_iterator_create(value); + while((object = list_next(itr))) { + if(print_this) + xstrfmtcat(print_this, ",%s", object); + else + print_this = xstrdup(object); } - xfree(print_this); - break; - default: - if(print_fields_parsable_print) - printf("%s|", "n/a"); - else - printf("%-*s ", field->len, "n/a"); - break; + list_iterator_destroy(itr); + } + + if(print_fields_parsable_print) + printf("%s|", print_this); + else { + if(strlen(print_this) > field->len) + print_this[field->len-1] = '+'; + + printf("%-*.*s ", field->len, field->len, print_this); } + xfree(print_this); } diff --git a/src/common/print_fields.h b/src/common/print_fields.h index 9264970a6..37ade380f 100644 --- a/src/common/print_fields.h +++ b/src/common/print_fields.h @@ -67,12 +67,6 @@ #include "src/common/xstring.h" #include "src/common/list.h" -typedef enum { - SLURM_PRINT_HEADLINE, - SLURM_PRINT_UNDERSCORE, - SLURM_PRINT_VALUE -} type_t; - typedef struct { uint16_t len; /* what is the width of the print */ char *name; /* name to be printed in header */ @@ -85,16 +79,12 @@ extern int print_fields_have_header; extern void destroy_print_field(void *object); extern void print_fields_header(List print_fields_list); -extern void print_fields_date(void); -extern void print_fields_str(type_t type, print_field_t *field, char *value); -extern void print_fields_uint32(type_t type, print_field_t *field, - uint32_t value); -extern void print_fields_uint64(type_t type, print_field_t *field, - uint64_t value); -extern void print_fields_time(type_t type, print_field_t *field, - uint32_t value); -extern void print_fields_char_list(type_t type, print_field_t *field, - List value); +extern void print_fields_date(print_field_t *field, time_t value); +extern void print_fields_str(print_field_t *field, char *value); +extern void print_fields_uint32(print_field_t *field, uint32_t value); +extern void print_fields_uint64(print_field_t *field, uint64_t value); +extern void print_fields_time(print_field_t *field, uint32_t value); +extern void print_fields_char_list(print_field_t *field, List value); #define print_fields_uint print_fields_uint32 #endif diff --git a/src/common/read_config.c b/src/common/read_config.c index 724caa3e1..2faba8f6a 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -146,7 +146,9 @@ s_p_options_t slurm_conf_options[] = { {"DefaultStoragePort", S_P_UINT32}, {"DefaultStorageType", S_P_STRING}, {"DefaultStorageUser", S_P_STRING}, - {"DefMemPerTask", S_P_UINT32}, + {"DefMemPerCPU", S_P_UINT32}, + {"DefMemPerNode", S_P_UINT32}, + {"DefMemPerTask", S_P_UINT32}, /* defunct */ {"DisableRootJobs", S_P_BOOLEAN}, {"EnforcePartLimits", S_P_BOOLEAN}, {"Epilog", S_P_STRING}, @@ -179,14 +181,16 @@ s_p_options_t slurm_conf_options[] = { {"Licenses", S_P_STRING}, {"MailProg", S_P_STRING}, {"MaxJobCount", S_P_UINT16}, - {"MaxMemPerTask", S_P_UINT32}, + {"MaxMemPerCPU", S_P_UINT32}, + {"MaxMemPerNode", S_P_UINT32}, + {"MaxMemPerTask", S_P_UINT32}, /* defunct */ {"MessageTimeout", S_P_UINT16}, {"MinJobAge", S_P_UINT16}, {"MpichGmDirectSupport", S_P_LONG, defunct_option}, {"MpiDefault", S_P_STRING}, {"PluginDir", S_P_STRING}, {"PlugStackConfig", S_P_STRING}, - {"PrivateData", S_P_UINT16}, + {"PrivateData", S_P_STRING}, {"ProctrackType", S_P_STRING}, {"Prolog", S_P_STRING}, {"PropagatePrioProcess", S_P_UINT16}, @@ -1551,7 +1555,7 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) s_p_get_string(&default_storage_host, "DefaultStorageHost", hashtbl); s_p_get_string(&default_storage_user, "DefaultStorageUser", hashtbl); s_p_get_string(&default_storage_pass, "DefaultStoragePass", hashtbl); - s_p_get_string(&default_storage_loc, "DefaultStorageLoc", hashtbl); + s_p_get_string(&default_storage_loc, "DefaultStorageLoc", hashtbl); s_p_get_uint32(&default_storage_port, "DefaultStoragePort", hashtbl); if (!s_p_get_string(&conf->job_credential_private_key, @@ -1577,8 +1581,11 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_string(&conf->crypto_type, "CryptoType", hashtbl)) conf->crypto_type = xstrdup(DEFAULT_CRYPTO_TYPE); - if (!s_p_get_uint32(&conf->def_mem_per_task, "DefMemPerTask", hashtbl)) - conf->def_mem_per_task = DEFAULT_MEM_PER_TASK; + if ((s_p_get_uint32(&conf->def_mem_per_task, "DefMemPerCPU", hashtbl)) || + (s_p_get_uint32(&conf->def_mem_per_task, "DefMemPerTask", hashtbl))) + conf->def_mem_per_task |= MEM_PER_CPU; + else if (!s_p_get_uint32(&conf->def_mem_per_task, "DefMemPerNode", hashtbl)) + conf->def_mem_per_task = DEFAULT_MEM_PER_CPU; if (!s_p_get_boolean((bool *) &conf->disable_root_jobs, "DisableRootJobs", hashtbl)) @@ -1586,7 +1593,7 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_boolean((bool *) &conf->enforce_part_limits, "EnforcePartLimits", hashtbl)) - conf->disable_root_jobs = DEFAULT_ENFORCE_PART_LIMITS; + conf->enforce_part_limits = DEFAULT_ENFORCE_PART_LIMITS; s_p_get_string(&conf->epilog, "Epilog", hashtbl); @@ -1640,11 +1647,21 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) xstrdup(DEFAULT_JOB_ACCT_GATHER_TYPE); if (!s_p_get_string(&conf->job_comp_type, "JobCompType", hashtbl)) { - if(default_storage_type) - conf->job_comp_type = - xstrdup_printf("jobcomp/%s", - default_storage_type); - else + if(default_storage_type) { + if(!strcasecmp("slurmdbd", default_storage_type)) { + error("Can not use the default storage type " + "specified for jobcomp since there is " + "not slurmdbd type. We are using %s " + "as the type. To disable this message " + "set JobCompType in your slurm.conf", + DEFAULT_JOB_COMP_TYPE); + conf->job_comp_type = + xstrdup(DEFAULT_JOB_COMP_TYPE); + } else + conf->job_comp_type = + xstrdup_printf("jobcomp/%s", + default_storage_type); + } else conf->job_comp_type = xstrdup(DEFAULT_JOB_COMP_TYPE); } if (!s_p_get_string(&conf->job_comp_loc, "JobCompLoc", hashtbl)) { @@ -1708,8 +1725,11 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_uint16(&conf->max_job_cnt, "MaxJobCount", hashtbl)) conf->max_job_cnt = DEFAULT_MAX_JOB_COUNT; - if (!s_p_get_uint32(&conf->max_mem_per_task, "MaxMemPerTask", hashtbl)) - conf->max_mem_per_task = DEFAULT_MAX_MEM_PER_TASK; + if ((s_p_get_uint32(&conf->max_mem_per_task, "MaxMemPerCPU", hashtbl)) || + (s_p_get_uint32(&conf->max_mem_per_task, "MaxMemPerTask", hashtbl))) + conf->max_mem_per_task |= MEM_PER_CPU; + else if (!s_p_get_uint32(&conf->max_mem_per_task, "MaxMemPerNode", hashtbl)) + conf->max_mem_per_task = DEFAULT_MAX_MEM_PER_CPU; if (!s_p_get_uint16(&conf->msg_timeout, "MessageTimeout", hashtbl)) conf->msg_timeout = DEFAULT_MSG_TIMEOUT; @@ -1807,7 +1827,17 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) && (!strcmp(conf->proctrack_type,"proctrack/linuxproc"))) fatal("proctrack/linuxproc is incompatable with switch/elan"); - s_p_get_uint16(&conf->private_data, "PrivateData", hashtbl); + if (s_p_get_string(&temp_str, "PrivateData", hashtbl)) { + if (strstr(temp_str, "job")) + conf->private_data |= PRIVATE_DATA_JOBS; + if (strstr(temp_str, "node")) + conf->private_data |= PRIVATE_DATA_NODES; + if (strstr(temp_str, "partition")) + conf->private_data |= PRIVATE_DATA_PARTITIONS; + if (strstr(temp_str, "all")) + conf->private_data = 0xffff; + xfree(temp_str); + } s_p_get_string(&conf->prolog, "Prolog", hashtbl); diff --git a/src/common/read_config.h b/src/common/read_config.h index cddc90068..c64361b4c 100644 --- a/src/common/read_config.h +++ b/src/common/read_config.h @@ -73,8 +73,8 @@ extern char *default_plugstack; #define DEFAULT_KILL_WAIT 30 #define DEFAULT_MAIL_PROG "/bin/mail" #define DEFAULT_MAX_JOB_COUNT 5000 -#define DEFAULT_MEM_PER_TASK 0 -#define DEFAULT_MAX_MEM_PER_TASK 0 +#define DEFAULT_MEM_PER_CPU 0 +#define DEFAULT_MAX_MEM_PER_CPU 0 #define DEFAULT_MIN_JOB_AGE 300 #define DEFAULT_MPI_DEFAULT "none" #define DEFAULT_MSG_TIMEOUT 10 diff --git a/src/common/slurm_accounting_storage.c b/src/common/slurm_accounting_storage.c index 597a7ee5b..6265ce45a 100644 --- a/src/common/slurm_accounting_storage.c +++ b/src/common/slurm_accounting_storage.c @@ -65,44 +65,52 @@ typedef struct slurm_acct_storage_ops { List user_list); int (*add_coord) (void *db_conn, uint32_t uid, List acct_list, - acct_user_cond_t *user_q); + acct_user_cond_t *user_cond); int (*add_accts) (void *db_conn, uint32_t uid, List acct_list); int (*add_clusters) (void *db_conn, uint32_t uid, List cluster_list); int (*add_associations) (void *db_conn, uint32_t uid, List association_list); + int (*add_qos) (void *db_conn, uint32_t uid, + List qos_list); List (*modify_users) (void *db_conn, uint32_t uid, - acct_user_cond_t *user_q, + acct_user_cond_t *user_cond, acct_user_rec_t *user); List (*modify_accts) (void *db_conn, uint32_t uid, - acct_account_cond_t *acct_q, + acct_account_cond_t *acct_cond, acct_account_rec_t *acct); List (*modify_clusters) (void *db_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q, + acct_cluster_cond_t *cluster_cond, acct_cluster_rec_t *cluster); List (*modify_associations)(void *db_conn, uint32_t uid, - acct_association_cond_t *assoc_q, + acct_association_cond_t *assoc_cond, acct_association_rec_t *assoc); List (*remove_users) (void *db_conn, uint32_t uid, - acct_user_cond_t *user_q); + acct_user_cond_t *user_cond); List (*remove_coord) (void *db_conn, uint32_t uid, List acct_list, - acct_user_cond_t *user_q); + acct_user_cond_t *user_cond); List (*remove_accts) (void *db_conn, uint32_t uid, - acct_account_cond_t *acct_q); + acct_account_cond_t *acct_cond); List (*remove_clusters) (void *db_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q); + acct_cluster_cond_t *cluster_cond); List (*remove_associations)(void *db_conn, uint32_t uid, - acct_association_cond_t *assoc_q); + acct_association_cond_t *assoc_cond); + List (*remove_qos) (void *db_conn, uint32_t uid, + acct_qos_cond_t *qos_cond); List (*get_users) (void *db_conn, - acct_user_cond_t *user_q); + acct_user_cond_t *user_cond); List (*get_accts) (void *db_conn, - acct_account_cond_t *acct_q); + acct_account_cond_t *acct_cond); List (*get_clusters) (void *db_conn, - acct_cluster_cond_t *cluster_q); + acct_cluster_cond_t *cluster_cond); List (*get_associations) (void *db_conn, - acct_association_cond_t *assoc_q); + acct_association_cond_t *assoc_cond); + List (*get_qos) (void *db_conn, + acct_qos_cond_t *qos_cond); + List (*get_txn) (void *db_conn, + acct_txn_cond_t *txn_cond); int (*get_usage) (void *db_conn, void *acct_assoc, time_t start, @@ -160,7 +168,7 @@ typedef struct slurm_acct_storage_context { static slurm_acct_storage_context_t * g_acct_storage_context = NULL; static pthread_mutex_t g_acct_storage_context_lock = - PTHREAD_MUTEX_INITIALIZER; + PTHREAD_MUTEX_INITIALIZER; /* * Local functions @@ -190,6 +198,7 @@ static slurm_acct_storage_ops_t * _acct_storage_get_ops( "acct_storage_p_add_accts", "acct_storage_p_add_clusters", "acct_storage_p_add_associations", + "acct_storage_p_add_qos", "acct_storage_p_modify_users", "acct_storage_p_modify_accounts", "acct_storage_p_modify_clusters", @@ -199,10 +208,13 @@ static slurm_acct_storage_ops_t * _acct_storage_get_ops( "acct_storage_p_remove_accts", "acct_storage_p_remove_clusters", "acct_storage_p_remove_associations", + "acct_storage_p_remove_qos", "acct_storage_p_get_users", "acct_storage_p_get_accts", "acct_storage_p_get_clusters", "acct_storage_p_get_associations", + "acct_storage_p_get_qos", + "acct_storage_p_get_txn", "acct_storage_p_get_usage", "acct_storage_p_roll_usage", "clusteracct_storage_p_node_down", @@ -254,7 +266,7 @@ static slurm_acct_storage_ops_t * _acct_storage_get_ops( c->acct_storage_type ); if ( c->cur_plugin == PLUGIN_INVALID_HANDLE ) { error( "cannot find accounting_storage plugin for %s", - c->acct_storage_type ); + c->acct_storage_type ); return NULL; } @@ -326,6 +338,8 @@ extern void destroy_acct_user_rec(void *object) list_destroy(acct_user->coord_accts); xfree(acct_user->default_acct); xfree(acct_user->name); + if(acct_user->qos_list) + list_destroy(acct_user->qos_list); xfree(acct_user); } } @@ -343,6 +357,8 @@ extern void destroy_acct_account_rec(void *object) xfree(acct_account->description); xfree(acct_account->name); xfree(acct_account->organization); + if(acct_account->qos_list) + list_destroy(acct_account->qos_list); xfree(acct_account); } } @@ -409,6 +425,27 @@ extern void destroy_acct_association_rec(void *object) } } +extern void destroy_acct_qos_rec(void *object) +{ + acct_qos_rec_t *acct_qos = (acct_qos_rec_t *)object; + if(acct_qos) { + xfree(acct_qos->description); + xfree(acct_qos->name); + xfree(acct_qos); + } +} + +extern void destroy_acct_txn_rec(void *object) +{ + acct_txn_rec_t *acct_txn = (acct_txn_rec_t *)object; + if(acct_txn) { + xfree(acct_txn->actor_name); + xfree(acct_txn->set_info); + xfree(acct_txn->where_query); + xfree(acct_txn); + } +} + extern void destroy_acct_user_cond(void *object) { acct_user_cond_t *acct_user = (acct_user_cond_t *)object; @@ -417,8 +454,8 @@ extern void destroy_acct_user_cond(void *object) destroy_acct_association_cond(acct_user->assoc_cond); if(acct_user->def_acct_list) list_destroy(acct_user->def_acct_list); - if(acct_user->user_list) - list_destroy(acct_user->user_list); + if(acct_user->qos_list) + list_destroy(acct_user->qos_list); xfree(acct_user); } } @@ -429,13 +466,13 @@ extern void destroy_acct_account_cond(void *object) (acct_account_cond_t *)object; if(acct_account) { - if(acct_account->acct_list) - list_destroy(acct_account->acct_list); destroy_acct_association_cond(acct_account->assoc_cond); if(acct_account->description_list) list_destroy(acct_account->description_list); if(acct_account->organization_list) list_destroy(acct_account->organization_list); + if(acct_account->qos_list) + list_destroy(acct_account->qos_list); xfree(acct_account); } } @@ -491,12 +528,40 @@ extern void destroy_acct_job_cond(void *object) list_destroy(job_cond->partition_list); if(job_cond->step_list) list_destroy(job_cond->step_list); - if(job_cond->user_list) - list_destroy(job_cond->user_list); + if(job_cond->state_list) + list_destroy(job_cond->state_list); + if(job_cond->userid_list) + list_destroy(job_cond->userid_list); xfree(job_cond); } } +extern void destroy_acct_qos_cond(void *object) +{ + acct_qos_cond_t *acct_qos = (acct_qos_cond_t *)object; + if(acct_qos) { + if(acct_qos->id_list) + list_destroy(acct_qos->id_list); + if(acct_qos->name_list) + list_destroy(acct_qos->name_list); + xfree(acct_qos); + } +} + +extern void destroy_acct_txn_cond(void *object) +{ + acct_txn_cond_t *acct_txn = (acct_txn_cond_t *)object; + if(acct_txn) { + if(acct_txn->action_list) + list_destroy(acct_txn->action_list); + if(acct_txn->actor_list) + list_destroy(acct_txn->actor_list); + if(acct_txn->id_list) + list_destroy(acct_txn->id_list); + xfree(acct_txn); + } +} + extern void destroy_acct_update_object(void *object) { acct_update_object_t *acct_update = @@ -524,6 +589,7 @@ extern void pack_acct_user_rec(void *in, Buf buffer) uint32_t count = 0; acct_coord_rec_t *coord = NULL; acct_association_rec_t *assoc = NULL; + char *tmp_info = NULL; if(!object) { pack16(0, buffer); @@ -531,7 +597,7 @@ extern void pack_acct_user_rec(void *in, Buf buffer) pack32(0, buffer); packnull(buffer); packnull(buffer); - pack16(0, buffer); + pack32(0, buffer); pack32(0, buffer); return; } @@ -565,7 +631,20 @@ extern void pack_acct_user_rec(void *in, Buf buffer) packstr(object->default_acct, buffer); packstr(object->name, buffer); - pack16((uint16_t)object->qos, buffer); + + if(object->qos_list) + count = list_count(object->qos_list); + + pack32(count, buffer); + + if(count) { + itr = list_iterator_create(object->qos_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; pack32(object->uid, buffer); } @@ -577,6 +656,7 @@ extern int unpack_acct_user_rec(void **object, Buf buffer) acct_coord_rec_t *coord = NULL; acct_association_rec_t *assoc = NULL; int i; + char *tmp_info = NULL; *object = object_ptr; safe_unpack16((uint16_t *)&object_ptr->admin_level, buffer); @@ -603,7 +683,14 @@ extern int unpack_acct_user_rec(void **object, Buf buffer) } safe_unpackstr_xmalloc(&object_ptr->default_acct, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&object_ptr->name, &uint32_tmp, buffer); - safe_unpack16((uint16_t *)&object_ptr->qos, buffer); + safe_unpack32(&count, buffer); + if(count) { + object_ptr->qos_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->qos_list, tmp_info); + } + } safe_unpack32(&object_ptr->uid, buffer); return SLURM_SUCCESS; @@ -614,11 +701,11 @@ unpack_error: return SLURM_ERROR; } -extern void pack_update_shares_used(void *object, Buf buffer) +extern void pack_update_shares_used(void *in, Buf buffer) { - shares_used_object_t *object_ptr = (shares_used_object_t *) object; - pack32(object_ptr->assoc_id, buffer); - pack32(object_ptr->shares_used, buffer); + shares_used_object_t *object = (shares_used_object_t *)in; + pack32(object->assoc_id, buffer); + pack32(object->shares_used, buffer); } extern int unpack_update_shares_used(void **object, Buf buffer) @@ -643,6 +730,7 @@ extern void pack_acct_account_rec(void *in, Buf buffer) uint32_t count = 0; acct_account_rec_t *object = (acct_account_rec_t *)in; acct_association_rec_t *assoc = NULL; + char *tmp_info = NULL; if(!object) { pack32(0, buffer); @@ -650,7 +738,7 @@ extern void pack_acct_account_rec(void *in, Buf buffer) packnull(buffer); packnull(buffer); packnull(buffer); - pack16(0, buffer); + pack32(0, buffer); return; } @@ -683,7 +771,20 @@ extern void pack_acct_account_rec(void *in, Buf buffer) packstr(object->description, buffer); packstr(object->name, buffer); packstr(object->organization, buffer); - pack16((uint16_t)object->qos, buffer); + + if(object->qos_list) + count = list_count(object->qos_list); + + pack32(count, buffer); + + if(count) { + itr = list_iterator_create(object->qos_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; } extern int unpack_acct_account_rec(void **object, Buf buffer) @@ -694,6 +795,7 @@ extern int unpack_acct_account_rec(void **object, Buf buffer) acct_coord_rec_t *coord = NULL; acct_association_rec_t *assoc = NULL; acct_account_rec_t *object_ptr = xmalloc(sizeof(acct_account_rec_t)); + char *tmp_info = NULL; *object = object_ptr; @@ -721,7 +823,14 @@ extern int unpack_acct_account_rec(void **object, Buf buffer) safe_unpackstr_xmalloc(&object_ptr->description, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&object_ptr->name, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&object_ptr->organization, &uint32_tmp, buffer); - safe_unpack16((uint16_t *)&object_ptr->qos, buffer); + safe_unpack32(&count, buffer); + if(count) { + object_ptr->qos_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->qos_list, tmp_info); + } + } return SLURM_SUCCESS; @@ -1031,6 +1140,81 @@ unpack_error: return SLURM_ERROR; } +extern void pack_acct_qos_rec(void *in, Buf buffer) +{ + acct_qos_rec_t *object = (acct_qos_rec_t *)in; + if(!object) { + packnull(buffer); + pack32(0, buffer); + packnull(buffer); + return; + } + packstr(object->description, buffer); + pack32(object->id, buffer); + packstr(object->name, buffer); +} + +extern int unpack_acct_qos_rec(void **object, Buf buffer) +{ + uint32_t uint32_tmp; + acct_qos_rec_t *object_ptr = xmalloc(sizeof(acct_qos_rec_t)); + + *object = object_ptr; + safe_unpackstr_xmalloc(&object_ptr->description, &uint32_tmp, buffer); + safe_unpack32(&object_ptr->id, buffer); + safe_unpackstr_xmalloc(&object_ptr->name, &uint32_tmp, buffer); + + return SLURM_SUCCESS; + +unpack_error: + destroy_acct_qos_rec(object_ptr); + *object = NULL; + return SLURM_ERROR; +} + +extern void pack_acct_txn_rec(void *in, Buf buffer) +{ + acct_txn_rec_t *object = (acct_txn_rec_t *)in; + if(!object) { + pack16(0, buffer); + packnull(buffer); + pack32(0, buffer); + packnull(buffer); + pack_time(0, buffer); + packnull(buffer); + return; + } + pack16(object->action, buffer); + packstr(object->actor_name, buffer); + pack32(object->id, buffer); + packstr(object->set_info, buffer); + pack_time(object->timestamp, buffer); + packstr(object->where_query, buffer); +} + +extern int unpack_acct_txn_rec(void **object, Buf buffer) +{ + uint32_t uint32_tmp; + acct_txn_rec_t *object_ptr = xmalloc(sizeof(acct_txn_rec_t)); + + *object = object_ptr; + + safe_unpack16(&object_ptr->action, buffer); + safe_unpackstr_xmalloc(&object_ptr->actor_name, &uint32_tmp, buffer); + safe_unpack32(&object_ptr->id, buffer); + safe_unpackstr_xmalloc(&object_ptr->set_info, &uint32_tmp, buffer); + safe_unpack_time(&object_ptr->timestamp, buffer); + safe_unpackstr_xmalloc(&object_ptr->where_query, &uint32_tmp, buffer); + + return SLURM_SUCCESS; + +unpack_error: + destroy_acct_txn_rec(object_ptr); + *object = NULL; + return SLURM_ERROR; + +} + extern void pack_acct_user_cond(void *in, Buf buffer) { char *tmp_info = NULL; @@ -1042,7 +1226,6 @@ extern void pack_acct_user_cond(void *in, Buf buffer) pack16(0, buffer); pack_acct_association_cond(NULL, buffer); pack32(0, buffer); - pack16(0, buffer); pack32(0, buffer); pack16(0, buffer); pack16(0, buffer); @@ -1068,20 +1251,20 @@ extern void pack_acct_user_cond(void *in, Buf buffer) } count = 0; - pack16((uint16_t)object->qos, buffer); - - if(object->user_list) - count = list_count(object->user_list); + if(object->qos_list) + count = list_count(object->qos_list); pack32(count, buffer); if(count) { - itr = list_iterator_create(object->user_list); + itr = list_iterator_create(object->qos_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } + count = 0; + pack16((uint16_t)object->with_assocs, buffer); pack16((uint16_t)object->with_coords, buffer); pack16((uint16_t)object->with_deleted, buffer); @@ -1112,13 +1295,12 @@ extern int unpack_acct_user_cond(void **object, Buf buffer) list_append(object_ptr->def_acct_list, tmp_info); } } - safe_unpack16((uint16_t *)&object_ptr->qos, buffer); safe_unpack32(&count, buffer); if(count) { - object_ptr->user_list = list_create(slurm_destroy_char); + object_ptr->qos_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); - list_append(object_ptr->user_list, tmp_info); + list_append(object_ptr->qos_list, tmp_info); } } safe_unpack16((uint16_t *)&object_ptr->with_assocs, buffer); @@ -1141,29 +1323,15 @@ extern void pack_acct_account_cond(void *in, Buf buffer) uint32_t count = 0; if(!object) { - pack32(0, buffer); pack_acct_association_cond(NULL, buffer); pack32(0, buffer); pack32(0, buffer); - pack16(0, buffer); + pack32(0, buffer); pack16(0, buffer); pack16(0, buffer); pack16(0, buffer); return; } - if(object->acct_list) - count = list_count(object->acct_list); - - pack32(count, buffer); - - if(count) { - itr = list_iterator_create(object->acct_list); - while((tmp_info = list_next(itr))) { - packstr(tmp_info, buffer); - } - list_iterator_destroy(itr); - } - pack_acct_association_cond(object->assoc_cond, buffer); count = 0; @@ -1192,8 +1360,23 @@ extern void pack_acct_account_cond(void *in, Buf buffer) packstr(tmp_info, buffer); } list_iterator_destroy(itr); + count = 0; + } + + if(object->qos_list) + count = list_count(object->qos_list); + + pack32(count, buffer); + + if(count) { + itr = list_iterator_create(object->qos_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + count = 0; } - pack16((uint16_t)object->qos, buffer); + pack16((uint16_t)object->with_assocs, buffer); pack16((uint16_t)object->with_coords, buffer); pack16((uint16_t)object->with_deleted, buffer); @@ -1208,15 +1391,6 @@ extern int unpack_acct_account_cond(void **object, Buf buffer) char *tmp_info = NULL; *object = object_ptr; - safe_unpack32(&count, buffer); - if(count) { - object_ptr->acct_list = list_create(slurm_destroy_char); - for(i=0; i<count; i++) { - safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); - list_append(object_ptr->acct_list, tmp_info); - } - } - if(unpack_acct_association_cond((void **)&object_ptr->assoc_cond, buffer) == SLURM_ERROR) goto unpack_error; @@ -1237,7 +1411,14 @@ extern int unpack_acct_account_cond(void **object, Buf buffer) list_append(object_ptr->organization_list, tmp_info); } } - safe_unpack16((uint16_t *)&object_ptr->qos, buffer); + safe_unpack32(&count, buffer); + if(count) { + object_ptr->qos_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->qos_list, tmp_info); + } + } safe_unpack16((uint16_t *)&object_ptr->with_assocs, buffer); safe_unpack16((uint16_t *)&object_ptr->with_coords, buffer); safe_unpack16((uint16_t *)&object_ptr->with_deleted, buffer); @@ -1520,12 +1701,15 @@ extern void pack_acct_job_cond(void *in, Buf buffer) pack32(0, buffer); pack32(0, buffer); pack16(0, buffer); + pack16(0, buffer); pack32(0, buffer); pack32(0, buffer); pack32(0, buffer); pack32(0, buffer); pack32(0, buffer); pack32(0, buffer); + pack32(0, buffer); + pack16(0, buffer); return; } @@ -1567,7 +1751,7 @@ extern void pack_acct_job_cond(void *in, Buf buffer) } count = 0; - pack16(object->completion, buffer); + pack16(object->duplicates, buffer); if(object->groupid_list) count = list_count(object->groupid_list); @@ -1607,21 +1791,36 @@ extern void pack_acct_job_cond(void *in, Buf buffer) } count = 0; + if(object->state_list) + count = list_count(object->state_list); + + pack32(count, buffer); + if(count) { + itr = list_iterator_create(object->state_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; + pack32(object->usage_end, buffer); pack32(object->usage_start, buffer); - if(object->user_list) - count = list_count(object->user_list); + if(object->userid_list) + count = list_count(object->userid_list); pack32(count, buffer); if(count) { - itr = list_iterator_create(object->user_list); + itr = list_iterator_create(object->userid_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } count = 0; + + pack16(object->without_steps, buffer); } extern int unpack_acct_job_cond(void **object, Buf buffer) @@ -1661,7 +1860,7 @@ extern int unpack_acct_job_cond(void **object, Buf buffer) } } - safe_unpack16(&object_ptr->completion, buffer); + safe_unpack16(&object_ptr->duplicates, buffer); safe_unpack32(&count, buffer); if(count) { @@ -1692,18 +1891,29 @@ extern int unpack_acct_job_cond(void **object, Buf buffer) } } + safe_unpack32(&count, buffer); + if(count) { + object_ptr->state_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->state_list, tmp_info); + } + } + safe_unpack32(&object_ptr->usage_end, buffer); safe_unpack32(&object_ptr->usage_start, buffer); safe_unpack32(&count, buffer); if(count) { - object_ptr->user_list = list_create(slurm_destroy_char); + object_ptr->userid_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); - list_append(object_ptr->user_list, tmp_info); + list_append(object_ptr->userid_list, tmp_info); } } + safe_unpack16(&object_ptr->without_steps, buffer); + return SLURM_SUCCESS; unpack_error: @@ -1712,6 +1922,215 @@ unpack_error: return SLURM_ERROR; } +extern void pack_acct_qos_cond(void *in, Buf buffer) +{ + uint32_t count = 0; + char *tmp_info = NULL; + ListIterator itr = NULL; + acct_qos_cond_t *object = (acct_qos_cond_t *)in; + + if(!object) { + pack32(0, buffer); + pack32(0, buffer); + pack32(0, buffer); + pack16(0, buffer); + return; + } + + if(object->description_list) + count = list_count(object->description_list); + + pack32(count, buffer); + if(count) { + itr = list_iterator_create(object->description_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; + + if(object->id_list) + count = list_count(object->id_list); + + pack32(count, buffer); + if(count) { + itr = list_iterator_create(object->id_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; + + if(object->name_list) + count = list_count(object->name_list); + + pack32(count, buffer); + if(count) { + itr = list_iterator_create(object->name_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; + + pack16(object->with_deleted, buffer); +} + +extern int unpack_acct_qos_cond(void **object, Buf buffer) +{ + uint32_t uint32_tmp; + int i; + uint32_t count; + acct_qos_cond_t *object_ptr = xmalloc(sizeof(acct_qos_cond_t)); + char *tmp_info = NULL; + + *object = object_ptr; + + safe_unpack32(&count, buffer); + if(count) { + object_ptr->description_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->description_list, tmp_info); + } + } + + safe_unpack32(&count, buffer); + if(count) { + object_ptr->id_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->id_list, tmp_info); + } + } + + safe_unpack32(&count, buffer); + if(count) { + object_ptr->name_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->name_list, tmp_info); + } + } + + safe_unpack16(&object_ptr->with_deleted, buffer); + return SLURM_SUCCESS; + +unpack_error: + destroy_acct_qos_cond(object_ptr); + *object = NULL; + return SLURM_ERROR; +} + +extern void pack_acct_txn_cond(void *in, Buf buffer) +{ + uint32_t count = 0; + char *tmp_info = NULL; + ListIterator itr = NULL; + acct_txn_cond_t *object = (acct_txn_cond_t *)in; + + if(!object) { + pack32(0, buffer); + pack32(0, buffer); + pack32(0, buffer); + pack32(0, buffer); + pack32(0, buffer); + return; + } + if(object->action_list) + count = list_count(object->action_list); + + pack32(count, buffer); + if(count) { + itr = list_iterator_create(object->action_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; + + if(object->actor_list) + count = list_count(object->actor_list); + + pack32(count, buffer); + if(count) { + itr = list_iterator_create(object->actor_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; + + if(object->id_list) + count = list_count(object->id_list); + + pack32(count, buffer); + if(count) { + itr = list_iterator_create(object->id_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; + + pack32(object->time_end, buffer); + pack32(object->time_start, buffer); + +} + +extern int unpack_acct_txn_cond(void **object, Buf buffer) +{ + uint32_t uint32_tmp; + int i; + uint32_t count; + acct_txn_cond_t *object_ptr = xmalloc(sizeof(acct_txn_cond_t)); + char *tmp_info = NULL; + + *object = object_ptr; + safe_unpack32(&count, buffer); + if(count) { + object_ptr->action_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->action_list, tmp_info); + } + } + + safe_unpack32(&count, buffer); + if(count) { + object_ptr->actor_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->actor_list, tmp_info); + } + } + + safe_unpack32(&count, buffer); + if(count) { + object_ptr->id_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->id_list, tmp_info); + } + } + + safe_unpack32(&object_ptr->time_end, buffer); + safe_unpack32(&object_ptr->time_start, buffer); + + return SLURM_SUCCESS; + +unpack_error: + destroy_acct_txn_cond(object_ptr); + *object = NULL; + return SLURM_ERROR; +} + extern void pack_acct_update_object(acct_update_object_t *object, Buf buffer) { uint32_t count = 0; @@ -1733,6 +2152,10 @@ extern void pack_acct_update_object(acct_update_object_t *object, Buf buffer) case ACCT_REMOVE_ASSOC: my_function = pack_acct_association_rec; break; + case ACCT_ADD_QOS: + case ACCT_REMOVE_QOS: + my_function = pack_acct_qos_rec; + break; case ACCT_UPDATE_NOTSET: default: error("unknown type set in update_object: %d", object->type); @@ -1779,6 +2202,11 @@ extern int unpack_acct_update_object(acct_update_object_t **object, Buf buffer) my_function = unpack_acct_association_rec; my_destroy = destroy_acct_association_rec; break; + case ACCT_ADD_QOS: + case ACCT_REMOVE_QOS: + my_function = unpack_acct_qos_rec; + my_destroy = destroy_acct_qos_rec; + break; case ACCT_UPDATE_NOTSET: default: error("unknown type set in update_object: %d", @@ -1803,46 +2231,55 @@ unpack_error: return SLURM_ERROR; } -extern char *acct_qos_str(acct_qos_level_t level) +extern char *acct_qos_str(List qos_list, uint32_t level) { - switch(level) { - case ACCT_QOS_NOTSET: - return "Not Set"; - break; - case ACCT_QOS_NORMAL: - return "Normal"; - break; - case ACCT_QOS_EXPEDITE: - return "Expedite"; - break; - case ACCT_QOS_STANDBY: - return "Standby"; - break; - case ACCT_QOS_EXEMPT: - return "Exempt"; - break; - default: - return "Unknown"; - break; + ListIterator itr = NULL; + acct_qos_rec_t *qos = NULL; + + if(!qos_list) { + error("We need a qos list to translate"); + return NULL; + } else if(!level) { + debug2("no level"); + return "None"; } - return "Unknown"; + + itr = list_iterator_create(qos_list); + while((qos = list_next(itr))) { + if(level == qos->id) + break; + } + list_iterator_destroy(itr); + if(qos) + return qos->name; + else + return NULL; } -extern acct_qos_level_t str_2_acct_qos(char *level) +extern uint32_t str_2_acct_qos(List qos_list, char *level) { - if(!level) { - return ACCT_QOS_NOTSET; - } else if(!strncasecmp(level, "Normal", 1)) { - return ACCT_QOS_NORMAL; - } else if(!strncasecmp(level, "Expedite", 3)) { - return ACCT_QOS_EXPEDITE; - } else if(!strncasecmp(level, "Standby", 1)) { - return ACCT_QOS_STANDBY; - } else if(!strncasecmp(level, "Exempt", 3)) { - return ACCT_QOS_EXEMPT; - } else { - return ACCT_QOS_NOTSET; + ListIterator itr = NULL; + acct_qos_rec_t *qos = NULL; + + if(!qos_list) { + error("We need a qos list to translate"); + return NO_VAL; + } else if(!level) { + debug2("no level"); + return 0; + } + + + itr = list_iterator_create(qos_list); + while((qos = list_next(itr))) { + if(!strncasecmp(level, qos->name, strlen(level))) + break; } + list_iterator_destroy(itr); + if(qos) + return qos->id; + else + return NO_VAL; } extern char *acct_admin_level_str(acct_admin_level_t level) @@ -1892,12 +2329,12 @@ extern void log_assoc_rec(acct_association_rec_t *assoc_ptr) debug(" fairshare : NONE"); else debug(" fairshare : %u", - assoc_ptr->fairshare); + assoc_ptr->fairshare); if(assoc_ptr->max_cpu_secs_per_job == INFINITE) debug(" max_cpu_secs_per_job : NONE"); else debug(" max_cpu_secs_per_job : %d", - assoc_ptr->max_cpu_secs_per_job); + assoc_ptr->max_cpu_secs_per_job); if(assoc_ptr->max_jobs == INFINITE) debug(" max_jobs : NONE"); else @@ -1906,12 +2343,12 @@ extern void log_assoc_rec(acct_association_rec_t *assoc_ptr) debug(" max_nodes_per_job : NONE"); else debug(" max_nodes_per_job : %d", - assoc_ptr->max_nodes_per_job); + assoc_ptr->max_nodes_per_job); if(assoc_ptr->max_wall_duration_per_job == INFINITE) debug(" max_wall_duration_per_job : NONE"); else debug(" max_wall_duration_per_job : %d", - assoc_ptr->max_wall_duration_per_job); + assoc_ptr->max_wall_duration_per_job); debug(" parent_acct : %s", assoc_ptr->parent_acct); debug(" partition : %s", assoc_ptr->partition); debug(" user : %s(%u)", @@ -2005,12 +2442,12 @@ extern int acct_storage_g_add_users(void *db_conn, uint32_t uid, } extern int acct_storage_g_add_coord(void *db_conn, uint32_t uid, - List acct_list, acct_user_cond_t *user_q) + List acct_list, acct_user_cond_t *user_cond) { if (slurm_acct_storage_init(NULL) < 0) return SLURM_ERROR; return (*(g_acct_storage_context->ops.add_coord)) - (db_conn, uid, acct_list, user_q); + (db_conn, uid, acct_list, user_cond); } extern int acct_storage_g_add_accounts(void *db_conn, uint32_t uid, @@ -2040,125 +2477,159 @@ extern int acct_storage_g_add_associations(void *db_conn, uint32_t uid, (db_conn, uid, association_list); } +extern int acct_storage_g_add_qos(void *db_conn, uint32_t uid, + List qos_list) +{ + if (slurm_acct_storage_init(NULL) < 0) + return SLURM_ERROR; + return (*(g_acct_storage_context->ops.add_qos)) + (db_conn, uid, qos_list); +} + extern List acct_storage_g_modify_users(void *db_conn, uint32_t uid, - acct_user_cond_t *user_q, - acct_user_rec_t *user) + acct_user_cond_t *user_cond, + acct_user_rec_t *user) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.modify_users)) - (db_conn, uid, user_q, user); + (db_conn, uid, user_cond, user); } extern List acct_storage_g_modify_accounts(void *db_conn, uint32_t uid, - acct_account_cond_t *acct_q, - acct_account_rec_t *acct) + acct_account_cond_t *acct_cond, + acct_account_rec_t *acct) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.modify_accts)) - (db_conn, uid, acct_q, acct); + (db_conn, uid, acct_cond, acct); } extern List acct_storage_g_modify_clusters(void *db_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q, - acct_cluster_rec_t *cluster) + acct_cluster_cond_t *cluster_cond, + acct_cluster_rec_t *cluster) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.modify_clusters)) - (db_conn, uid, cluster_q, cluster); + (db_conn, uid, cluster_cond, cluster); } -extern List acct_storage_g_modify_associations(void *db_conn, uint32_t uid, - acct_association_cond_t *assoc_q, - acct_association_rec_t *assoc) +extern List acct_storage_g_modify_associations( + void *db_conn, uint32_t uid, + acct_association_cond_t *assoc_cond, + acct_association_rec_t *assoc) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.modify_associations)) - (db_conn, uid, assoc_q, assoc); + (db_conn, uid, assoc_cond, assoc); } extern List acct_storage_g_remove_users(void *db_conn, uint32_t uid, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.remove_users)) - (db_conn, uid, user_q); + (db_conn, uid, user_cond); } extern List acct_storage_g_remove_coord(void *db_conn, uint32_t uid, List acct_list, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.remove_coord)) - (db_conn, uid, acct_list, user_q); + (db_conn, uid, acct_list, user_cond); } extern List acct_storage_g_remove_accounts(void *db_conn, uint32_t uid, - acct_account_cond_t *acct_q) + acct_account_cond_t *acct_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.remove_accts)) - (db_conn, uid, acct_q); + (db_conn, uid, acct_cond); } extern List acct_storage_g_remove_clusters(void *db_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q) + acct_cluster_cond_t *cluster_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.remove_clusters)) - (db_conn, uid, cluster_q); + (db_conn, uid, cluster_cond); } -extern List acct_storage_g_remove_associations(void *db_conn, uint32_t uid, - acct_association_cond_t *assoc_q) +extern List acct_storage_g_remove_associations( + void *db_conn, uint32_t uid, + acct_association_cond_t *assoc_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.remove_associations)) - (db_conn, uid, assoc_q); + (db_conn, uid, assoc_cond); +} + +extern List acct_storage_g_remove_qos(void *db_conn, uint32_t uid, + acct_qos_cond_t *qos_cond) +{ + if (slurm_acct_storage_init(NULL) < 0) + return NULL; + return (*(g_acct_storage_context->ops.remove_qos)) + (db_conn, uid, qos_cond); } extern List acct_storage_g_get_users(void *db_conn, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; - return (*(g_acct_storage_context->ops.get_users))(db_conn, user_q); + return (*(g_acct_storage_context->ops.get_users))(db_conn, user_cond); } extern List acct_storage_g_get_accounts(void *db_conn, - acct_account_cond_t *acct_q) + acct_account_cond_t *acct_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.get_accts)) - (db_conn, acct_q); + (db_conn, acct_cond); } extern List acct_storage_g_get_clusters(void *db_conn, - acct_cluster_cond_t *cluster_q) + acct_cluster_cond_t *cluster_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.get_clusters)) - (db_conn, cluster_q); + (db_conn, cluster_cond); } extern List acct_storage_g_get_associations(void *db_conn, - acct_association_cond_t *assoc_q) + acct_association_cond_t *assoc_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.get_associations)) - (db_conn, assoc_q); + (db_conn, assoc_cond); +} + +extern List acct_storage_g_get_qos(void *db_conn, acct_qos_cond_t *qos_cond) +{ + if (slurm_acct_storage_init(NULL) < 0) + return NULL; + return (*(g_acct_storage_context->ops.get_qos))(db_conn, qos_cond); +} + +extern List acct_storage_g_get_txn(void *db_conn, acct_txn_cond_t *txn_cond) +{ + if (slurm_acct_storage_init(NULL) < 0) + return NULL; + return (*(g_acct_storage_context->ops.get_txn))(db_conn, txn_cond); } extern int acct_storage_g_get_usage(void *db_conn, @@ -2273,7 +2744,8 @@ extern int jobacct_storage_g_step_complete (void *db_conn, { if (slurm_acct_storage_init(NULL) < 0) return SLURM_ERROR; - return (*(g_acct_storage_context->ops.step_complete))(db_conn, step_ptr); + return (*(g_acct_storage_context->ops.step_complete))(db_conn, + step_ptr); } /* diff --git a/src/common/slurm_accounting_storage.h b/src/common/slurm_accounting_storage.h index 43cac986a..f0b7bb3a7 100644 --- a/src/common/slurm_accounting_storage.h +++ b/src/common/slurm_accounting_storage.h @@ -53,14 +53,6 @@ typedef enum { ACCT_ADMIN_SUPER_USER } acct_admin_level_t; -typedef enum { - ACCT_QOS_NOTSET, - ACCT_QOS_NORMAL, - ACCT_QOS_EXPEDITE, - ACCT_QOS_STANDBY, - ACCT_QOS_EXEMPT -} acct_qos_level_t; - typedef enum { ACCT_UPDATE_NOTSET, ACCT_ADD_USER, @@ -70,7 +62,9 @@ typedef enum { ACCT_MODIFY_ASSOC, ACCT_REMOVE_USER, ACCT_REMOVE_ASSOC, - ACCT_REMOVE_COORD + ACCT_REMOVE_COORD, + ACCT_ADD_QOS, + ACCT_REMOVE_QOS } acct_update_type_t; /* Association conditions used for queries of the database */ @@ -100,11 +94,11 @@ typedef struct { } acct_association_cond_t; typedef struct { - List acct_list; /* list of char * */ - acct_association_cond_t *assoc_cond; + acct_association_cond_t *assoc_cond;/* use acct_list here for + names */ List description_list; /* list of char * */ List organization_list; /* list of char * */ - acct_qos_level_t qos; + List qos_list; /* list of char * */ uint16_t with_assocs; uint16_t with_coords; uint16_t with_deleted; @@ -116,7 +110,7 @@ typedef struct { char *description; char *name; char *organization; - acct_qos_level_t qos; + List qos_list /* list of char *'s */; } acct_account_rec_t; typedef struct { @@ -193,22 +187,36 @@ typedef struct { List acct_list; /* list of char * */ List associd_list; /* list of char */ List cluster_list; /* list of char * */ - uint16_t completion; /* get job completion records instead - * of accounting record */ + uint16_t duplicates; /* report duplicate job entries */ List groupid_list; /* list of char * */ List partition_list; /* list of char * */ List step_list; /* list of jobacct_selected_step_t */ + List state_list; /* list of char * */ uint32_t usage_end; uint32_t usage_start; - List user_list; /* list of char * */ + List userid_list; /* list of char * */ + uint16_t without_steps; /* don't give me step info */ } acct_job_cond_t; +typedef struct { + char *description; + uint32_t id; + char *name; +} acct_qos_rec_t; + +typedef struct { + List description_list; /* list of char * */ + List id_list; /* list of char * */ + List name_list; /* list of char * */ + uint16_t with_deleted; +} acct_qos_cond_t; + typedef struct { acct_admin_level_t admin_level; - acct_association_cond_t *assoc_cond; + acct_association_cond_t *assoc_cond; /* use user_list here for + names */ List def_acct_list; /* list of char * */ - acct_qos_level_t qos; - List user_list; /* list of char * */ + List qos_list; /* list of char * */ uint16_t with_assocs; uint16_t with_coords; uint16_t with_deleted; @@ -220,10 +228,27 @@ typedef struct { List coord_accts; /* list of acct_coord_rec_t *'s */ char *default_acct; char *name; - acct_qos_level_t qos; + List qos_list; /* list of char * */ uint32_t uid; } acct_user_rec_t; +typedef struct { + List action_list; /* list of char * */ + List actor_list; /* list of char * */ + List id_list; /* list of char * */ + uint32_t time_end; + uint32_t time_start; +} acct_txn_cond_t; + +typedef struct { + uint16_t action; + char *actor_name; + uint32_t id; + char *set_info; + time_t timestamp; + char *where_query; +} acct_txn_rec_t; + typedef struct { List objects; /* depending on type */ acct_update_type_t type; @@ -251,52 +276,64 @@ extern void destroy_cluster_accounting_rec(void *object); extern void destroy_acct_cluster_rec(void *object); extern void destroy_acct_accounting_rec(void *object); extern void destroy_acct_association_rec(void *object); +extern void destroy_acct_qos_rec(void *object); +extern void destroy_acct_txn_rec(void *object); extern void destroy_acct_user_cond(void *object); extern void destroy_acct_account_cond(void *object); extern void destroy_acct_cluster_cond(void *object); extern void destroy_acct_association_cond(void *object); extern void destroy_acct_job_cond(void *object); +extern void destroy_acct_qos_cond(void *object); +extern void destroy_acct_txn_cond(void *object); extern void destroy_acct_update_object(void *object); extern void destroy_update_shares_rec(void *object); /* pack functions */ -extern void pack_acct_user_rec(void *object, Buf buffer); +extern void pack_acct_user_rec(void *in, Buf buffer); extern int unpack_acct_user_rec(void **object, Buf buffer); -extern void pack_acct_account_rec(void *object, Buf buffer); +extern void pack_acct_account_rec(void *in, Buf buffer); extern int unpack_acct_account_rec(void **object, Buf buffer); -extern void pack_acct_coord_rec(void *object, Buf buffer); +extern void pack_acct_coord_rec(void *in, Buf buffer); extern int unpack_acct_coord_rec(void **object, Buf buffer); -extern void pack_cluster_accounting_rec(void *object, Buf buffer); +extern void pack_cluster_accounting_rec(void *in, Buf buffer); extern int unpack_cluster_accounting_rec(void **object, Buf buffer); -extern void pack_acct_cluster_rec(void *object, Buf buffer); +extern void pack_acct_cluster_rec(void *in, Buf buffer); extern int unpack_acct_cluster_rec(void **object, Buf buffer); -extern void pack_acct_accounting_rec(void *object, Buf buffer); +extern void pack_acct_accounting_rec(void *in, Buf buffer); extern int unpack_acct_accounting_rec(void **object, Buf buffer); -extern void pack_acct_association_rec(void *object, Buf buffer); +extern void pack_acct_association_rec(void *in, Buf buffer); extern int unpack_acct_association_rec(void **object, Buf buffer); +extern void pack_acct_qos_rec(void *in, Buf buffer); +extern int unpack_acct_qos_rec(void **object, Buf buffer); +extern void pack_acct_txn_rec(void *in, Buf buffer); +extern int unpack_acct_txn_rec(void **object, Buf buffer); -extern void pack_acct_user_cond(void *object, Buf buffer); +extern void pack_acct_user_cond(void *in, Buf buffer); extern int unpack_acct_user_cond(void **object, Buf buffer); -extern void pack_acct_account_cond(void *object, Buf buffer); +extern void pack_acct_account_cond(void *in, Buf buffer); extern int unpack_acct_account_cond(void **object, Buf buffer); -extern void pack_acct_cluster_cond(void *object, Buf buffer); +extern void pack_acct_cluster_cond(void *in, Buf buffer); extern int unpack_acct_cluster_cond(void **object, Buf buffer); -extern void pack_acct_association_cond(void *object, Buf buffer); +extern void pack_acct_association_cond(void *in, Buf buffer); extern int unpack_acct_association_cond(void **object, Buf buffer); -extern void pack_acct_job_cond(void *object, Buf buffer); +extern void pack_acct_job_cond(void *in, Buf buffer); extern int unpack_acct_job_cond(void **object, Buf buffer); +extern void pack_acct_qos_cond(void *in, Buf buffer); +extern int unpack_acct_qos_cond(void **object, Buf buffer); +extern void pack_acct_txn_cond(void *in, Buf buffer); +extern int unpack_acct_txn_cond(void **object, Buf buffer); extern void pack_acct_update_object(acct_update_object_t *object, Buf buffer); extern int unpack_acct_update_object(acct_update_object_t **object, Buf buffer); -extern void pack_update_shares_used(void *object, Buf buffer); +extern void pack_update_shares_used(void *in, Buf buffer); extern int unpack_update_shares_used(void **object, Buf buffer); -extern char *acct_qos_str(acct_qos_level_t level); -extern acct_qos_level_t str_2_acct_qos(char *level); +extern char *acct_qos_str(List qos_list, uint32_t level); +extern uint32_t str_2_acct_qos(List qos_list, char *level); extern char *acct_admin_level_str(acct_admin_level_t level); extern acct_admin_level_t str_2_acct_admin_level(char *level); @@ -340,11 +377,12 @@ extern int acct_storage_g_add_users(void *db_conn, uint32_t uid, /* * add users as account coordinators * IN: acct_list list of char *'s of names of accounts - * IN: acct_user_cond_t *user_q + * IN: acct_user_cond_t *user_cond * RET: SLURM_SUCCESS on success SLURM_ERROR else */ extern int acct_storage_g_add_coord(void *db_conn, uint32_t uid, - List acct_list, acct_user_cond_t *user_q); + List acct_list, + acct_user_cond_t *user_cond); /* @@ -364,94 +402,111 @@ extern int acct_storage_g_add_clusters(void *db_conn, uint32_t uid, List cluster_list); /* - * add accts to accounting system + * add associations to accounting system * IN: association_list List of acct_association_rec_t * * RET: SLURM_SUCCESS on success SLURM_ERROR else */ extern int acct_storage_g_add_associations(void *db_conn, uint32_t uid, List association_list); +/* + * add qos's to accounting system + * IN: qos_list List of char * + * RET: SLURM_SUCCESS on success SLURM_ERROR else + */ +extern int acct_storage_g_add_qos(void *db_conn, uint32_t uid, + List qos_list); + /* * modify existing users in the accounting system - * IN: acct_user_cond_t *user_q + * IN: acct_user_cond_t *user_cond * IN: acct_user_rec_t *user * RET: List containing (char *'s) else NULL on error */ extern List acct_storage_g_modify_users(void *db_conn, uint32_t uid, - acct_user_cond_t *user_q, + acct_user_cond_t *user_cond, acct_user_rec_t *user); /* * modify existing accounts in the accounting system - * IN: acct_acct_cond_t *acct_q + * IN: acct_acct_cond_t *acct_cond * IN: acct_account_rec_t *acct * RET: List containing (char *'s) else NULL on error */ extern List acct_storage_g_modify_accounts(void *db_conn, uint32_t uid, - acct_account_cond_t *acct_q, + acct_account_cond_t *acct_cond, acct_account_rec_t *acct); /* * modify existing clusters in the accounting system - * IN: acct_cluster_cond_t *cluster_q + * IN: acct_cluster_cond_t *cluster_cond * IN: acct_cluster_rec_t *cluster * RET: List containing (char *'s) else NULL on error */ extern List acct_storage_g_modify_clusters(void *db_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q, + acct_cluster_cond_t *cluster_cond, acct_cluster_rec_t *cluster); /* * modify existing associations in the accounting system - * IN: acct_association_cond_t *assoc_q + * IN: acct_association_cond_t *assoc_cond * IN: acct_association_rec_t *assoc * RET: List containing (char *'s) else NULL on error */ -extern List acct_storage_g_modify_associations(void *db_conn, uint32_t uid, - acct_association_cond_t *assoc_q, - acct_association_rec_t *assoc); +extern List acct_storage_g_modify_associations( + void *db_conn, uint32_t uid, + acct_association_cond_t *assoc_cond, + acct_association_rec_t *assoc); /* * remove users from accounting system - * IN: acct_user_cond_t *user_q + * IN: acct_user_cond_t *user_cond * RET: List containing (char *'s) else NULL on error */ extern List acct_storage_g_remove_users(void *db_conn, uint32_t uid, - acct_user_cond_t *user_q); + acct_user_cond_t *user_cond); /* * remove users from being a coordinator of an account * IN: acct_list list of char *'s of names of accounts - * IN: acct_user_cond_t *user_q + * IN: acct_user_cond_t *user_cond * RET: List containing (char *'s) else NULL on error */ extern List acct_storage_g_remove_coord(void *db_conn, uint32_t uid, List acct_list, - acct_user_cond_t *user_q); + acct_user_cond_t *user_cond); /* * remove accounts from accounting system - * IN: acct_account_cond_t *acct_q + * IN: acct_account_cond_t *acct_cond * RET: List containing (char *'s) else NULL on error */ extern List acct_storage_g_remove_accounts(void *db_conn, uint32_t uid, - acct_account_cond_t *acct_q); + acct_account_cond_t *acct_cond); /* * remove clusters from accounting system - * IN: acct_cluster_cond_t *cluster_q + * IN: acct_cluster_cond_t *cluster_cond * RET: List containing (char *'s) else NULL on error */ extern List acct_storage_g_remove_clusters(void *db_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q); + acct_cluster_cond_t *cluster_cond); /* * remove associations from accounting system - * IN: acct_association_cond_t *assoc_q + * IN: acct_association_cond_t *assoc_cond * RET: List containing (char *'s) else NULL on error */ -extern List acct_storage_g_remove_associations(void *db_conn, uint32_t uid, - acct_association_cond_t *assoc_q); +extern List acct_storage_g_remove_associations( + void *db_conn, uint32_t uid, acct_association_cond_t *assoc_cond); + +/* + * remove qos from accounting system + * IN: acct_qos_cond_t *assoc_qos + * RET: List containing (char *'s) else NULL on error + */ +extern List acct_storage_g_remove_qos( + void *db_conn, uint32_t uid, acct_qos_cond_t *qos_cond); /* * get info from the storage @@ -461,7 +516,7 @@ extern List acct_storage_g_remove_associations(void *db_conn, uint32_t uid, * note List needs to be freed when called */ extern List acct_storage_g_get_users(void *db_conn, - acct_user_cond_t *user_q); + acct_user_cond_t *user_cond); /* * get info from the storage @@ -471,7 +526,7 @@ extern List acct_storage_g_get_users(void *db_conn, * note List needs to be freed when called */ extern List acct_storage_g_get_accounts(void *db_conn, - acct_account_cond_t *acct_q); + acct_account_cond_t *acct_cond); /* * get info from the storage @@ -480,8 +535,8 @@ extern List acct_storage_g_get_accounts(void *db_conn, * returns List of acct_cluster_rec_t * * note List needs to be freed when called */ -extern List acct_storage_g_get_clusters(void *db_conn, - acct_cluster_cond_t *cluster_q); +extern List acct_storage_g_get_clusters( + void *db_conn, acct_cluster_cond_t *cluster_cond); /* * get info from the storage @@ -489,8 +544,25 @@ extern List acct_storage_g_get_clusters(void *db_conn, * RET: List of acct_association_rec_t * * note List needs to be freed when called */ -extern List acct_storage_g_get_associations(void *db_conn, - acct_association_cond_t *assoc_q); +extern List acct_storage_g_get_associations( + void *db_conn, acct_association_cond_t *assoc_cond); + + +/* + * get info from the storage + * IN: acct_qos_cond_t * + * RET: List of acct_qos_rec_t * + * note List needs to be freed when called + */ +extern List acct_storage_g_get_qos(void *db_conn, acct_qos_cond_t *qos_cond); + +/* + * get info from the storage + * IN: acct_txn_cond_t * + * RET: List of acct_txn_rec_t * + * note List needs to be freed when called + */ +extern List acct_storage_g_get_txn(void *db_conn, acct_txn_cond_t *txn_cond); /* * get info from the storage diff --git a/src/common/slurm_cred.c b/src/common/slurm_cred.c index 4373f507e..adc3a68b6 100644 --- a/src/common/slurm_cred.c +++ b/src/common/slurm_cred.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/common/slurm_cred.c - SLURM job credential functions - * $Id: slurm_cred.c 14208 2008-06-06 19:15:24Z da $ + * $Id: slurm_cred.c 14499 2008-07-11 22:54:48Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -721,6 +721,40 @@ slurm_cred_faker(slurm_cred_arg_t *arg) } +void slurm_cred_free_args(slurm_cred_arg_t *arg) +{ + xfree(arg->hostlist); + xfree(arg->alloc_lps); + arg->alloc_lps_cnt = 0; +} + +int +slurm_cred_get_args(slurm_cred_t cred, slurm_cred_arg_t *arg) +{ + xassert(cred != NULL); + xassert(arg != NULL); + + /* + * set arguments to cred contents + */ + slurm_mutex_lock(&cred->mutex); + arg->jobid = cred->jobid; + arg->stepid = cred->stepid; + arg->uid = cred->uid; + arg->job_mem = cred->job_mem; + arg->task_mem = cred->task_mem; + arg->hostlist = xstrdup(cred->nodes); + arg->alloc_lps_cnt = cred->alloc_lps_cnt; + if (arg->alloc_lps_cnt > 0) { + arg->alloc_lps = xmalloc(arg->alloc_lps_cnt * sizeof(uint32_t)); + memcpy(arg->alloc_lps, cred->alloc_lps, + arg->alloc_lps_cnt * sizeof(uint32_t)); + } else + arg->alloc_lps = NULL; + slurm_mutex_unlock(&cred->mutex); + + return SLURM_SUCCESS; +} int slurm_cred_verify(slurm_cred_ctx_t ctx, slurm_cred_t cred, @@ -775,13 +809,13 @@ slurm_cred_verify(slurm_cred_ctx_t ctx, slurm_cred_t cred, arg->job_mem = cred->job_mem; arg->task_mem = cred->task_mem; arg->hostlist = xstrdup(cred->nodes); - arg->alloc_lps_cnt = cred->alloc_lps_cnt; - arg->alloc_lps = NULL; - if (arg->alloc_lps_cnt > 0) { - arg->alloc_lps = xmalloc(arg->alloc_lps_cnt * sizeof(uint32_t)); - memcpy(arg->alloc_lps, cred->alloc_lps, - arg->alloc_lps_cnt * sizeof(uint32_t)); - } + arg->alloc_lps_cnt = cred->alloc_lps_cnt; + if (arg->alloc_lps_cnt > 0) { + arg->alloc_lps = xmalloc(arg->alloc_lps_cnt * sizeof(uint32_t)); + memcpy(arg->alloc_lps, cred->alloc_lps, + arg->alloc_lps_cnt * sizeof(uint32_t)); + } else + arg->alloc_lps = NULL; slurm_mutex_unlock(&cred->mutex); diff --git a/src/common/slurm_cred.h b/src/common/slurm_cred.h index 9fb47d6ab..01ce80550 100644 --- a/src/common/slurm_cred.h +++ b/src/common/slurm_cred.h @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/common/slurm_cred.h - SLURM job credential operations - * $Id: slurm_cred.h 14148 2008-05-28 23:35:40Z jette $ + * $Id: slurm_cred.h 14499 2008-07-11 22:54:48Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -165,6 +165,13 @@ slurm_cred_t slurm_cred_copy(slurm_cred_t cred); */ slurm_cred_t slurm_cred_faker(slurm_cred_arg_t *arg); +/* Free the credential arguments as loaded by either + * slurm_cred_get_args() or slurm_cred_verify() */ +void slurm_cred_free_args(slurm_cred_arg_t *arg); + +/* Make a copy of the credential's arguements */ +int slurm_cred_get_args(slurm_cred_t cred, slurm_cred_arg_t *arg); + /* * Verify the signed credential `cred,' and return cred contents in * the cred_arg structure. The credential is cached and cannot be reused. diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index c513b3f8b..e7674c090 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -69,6 +69,7 @@ static void _slurm_free_partition_info_members (partition_info_t * part); static void _free_all_step_info (job_step_info_response_msg_t *msg); static void _slurm_free_job_step_info_members (job_step_info_t * msg); +static void _make_lower(char *change); /* * slurm_msg_t_init - initialize a slurm message @@ -110,6 +111,84 @@ extern void slurm_destroy_char(void *object) xfree(tmp); } +/* returns number of objects added to list */ +extern int slurm_addto_char_list(List char_list, char *names) +{ + int i=0, start=0; + char *name = NULL, *tmp_char = NULL; + ListIterator itr = NULL; + char quote_c = '\0'; + int quote = 0; + int count = 0; + + if(!char_list) { + error("No list was given to fill in"); + return 0; + } + + itr = list_iterator_create(char_list); + if(names) { + if (names[i] == '\"' || names[i] == '\'') { + quote_c = names[i]; + quote = 1; + i++; + } + start = i; + while(names[i]) { + //info("got %d - %d = %d", i, start, i-start); + if(quote && names[i] == quote_c) + break; + else if (names[i] == '\"' || names[i] == '\'') + names[i] = '`'; + else if(names[i] == ',') { + if((i-start) > 0) { + name = xmalloc((i-start+1)); + memcpy(name, names+start, (i-start)); + //info("got %s %d", name, i-start); + + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + _make_lower(name); + list_append(char_list, name); + count++; + } else + xfree(name); + list_iterator_reset(itr); + } + i++; + start = i; + if(!names[i]) { + info("There is a problem with " + "your request. It appears you " + "have spaces inside your list."); + break; + } + } + i++; + } + if((i-start) > 0) { + name = xmalloc((i-start)+1); + memcpy(name, names+start, (i-start)); + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + _make_lower(name); + list_append(char_list, name); + count++; + } else + xfree(name); + } + } + list_iterator_destroy(itr); + return count; +} void slurm_free_last_update_msg(last_update_msg_t * msg) { @@ -637,6 +716,30 @@ void inline slurm_free_will_run_response_msg(will_run_response_msg_t *msg) } } +extern void +private_data_string(uint16_t private_data, char *str, int str_len) +{ + if (str_len > 0) + str[0] = '\0'; + if (str_len < 22) { + error("private_data_string: output buffer too small"); + return; + } + + if (private_data & PRIVATE_DATA_JOBS) + strcat(str, "jobs"); + if (private_data & PRIVATE_DATA_NODES) { + if (str[0]) + strcat(str, ","); + strcat(str, "nodes"); + } + if (private_data & PRIVATE_DATA_PARTITIONS) { + if (str[0]) + strcat(str, ","); + strcat(str, "partitions"); + } +} + char *job_state_string(enum job_states inx) { if (inx & JOB_COMPLETING) @@ -1254,6 +1357,8 @@ extern int slurm_free_msg_data(slurm_msg_type_t type, void *data) slurm_free_suspend_msg(data); break; case REQUEST_JOB_READY: + case REQUEST_JOB_REQUEUE: + case REQUEST_JOB_INFO_SINGLE: slurm_free_job_id_msg(data); break; case REQUEST_NODE_SELECT_INFO: @@ -1371,3 +1476,18 @@ void inline slurm_free_job_notify_msg(job_notify_msg_t * msg) xfree(msg); } } + +/* make everything lowercase should not be called on static char *'s */ +static void _make_lower(char *change) +{ + if(change) { + int j = 0; + while(change[j]) { + char lower = tolower(change[j]); + if(lower != change[j]) + change[j] = lower; + j++; + } + } +} + diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index 799f83ea8..d6ed3e7ed 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -1,9 +1,8 @@ /****************************************************************************\ * slurm_protocol_defs.h - definitions used for RPCs - * - * $Id: slurm_protocol_defs.h 13755 2008-04-01 19:12:53Z jette $ ***************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Kevin Tew <tew1@llnl.gov>. * LLNL-CODE-402394. @@ -117,6 +116,7 @@ typedef enum { REQUEST_TRIGGER_GET, REQUEST_TRIGGER_CLEAR, RESPONSE_TRIGGER_GET, + REQUEST_JOB_INFO_SINGLE, REQUEST_UPDATE_JOB = 3001, REQUEST_UPDATE_NODE, @@ -746,6 +746,7 @@ extern void slurm_msg_t_init (slurm_msg_t *msg); extern void slurm_msg_t_copy(slurm_msg_t *dest, slurm_msg_t *src); extern void slurm_destroy_char(void *object); +extern int slurm_addto_char_list(List char_list, char *names); /* free message functions */ void inline slurm_free_checkpoint_tasks_msg(checkpoint_tasks_msg_t * msg); @@ -852,14 +853,17 @@ extern char *job_state_string(enum job_states inx); extern char *job_state_string_compact(enum job_states inx); extern char *node_state_string(enum node_states inx); extern char *node_state_string_compact(enum node_states inx); +extern void private_data_string(uint16_t private_data, char *str, int str_len); #define safe_read(fd, buf, size) do { \ int remaining = size; \ char *ptr = (char *) buf; \ int rc; \ while (remaining > 0) { \ - rc = read(fd, ptr, remaining); \ - if (rc == 0) { \ + rc = read(fd, ptr, remaining); \ + if ((rc == 0) && (remaining == size)) \ + goto rwfail; \ + else if (rc == 0) { \ debug("%s:%d: %s: safe_read (%d of %d) EOF", \ __FILE__, __LINE__, __CURRENT_FUNC__, \ remaining, (int)size); \ diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index a0abfea97..9be1aabca 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -691,6 +691,7 @@ pack_msg(slurm_msg_t const *msg, Buf buffer) case REQUEST_JOB_READY: case REQUEST_JOB_REQUEUE: + case REQUEST_JOB_INFO_SINGLE: _pack_job_ready_msg((job_id_msg_t *)msg->data, buffer); break; @@ -1040,6 +1041,7 @@ unpack_msg(slurm_msg_t * msg, Buf buffer) case REQUEST_JOB_READY: case REQUEST_JOB_REQUEUE: + case REQUEST_JOB_INFO_SINGLE: rc = _unpack_job_ready_msg((job_id_msg_t **) & msg->data, buffer); break; diff --git a/src/common/slurmdbd_defs.c b/src/common/slurmdbd_defs.c index 3de583ca2..25818bcf5 100644 --- a/src/common/slurmdbd_defs.c +++ b/src/common/slurmdbd_defs.c @@ -342,6 +342,9 @@ extern Buf pack_slurmdbd_msg(slurmdbd_msg_t *req) case DBD_GOT_CLUSTERS: case DBD_GOT_JOBS: case DBD_GOT_LIST: + case DBD_ADD_QOS: + case DBD_GOT_QOS: + case DBD_GOT_TXN: case DBD_GOT_USERS: case DBD_UPDATE_SHARES_USED: slurmdbd_pack_list_msg( @@ -361,10 +364,13 @@ extern Buf pack_slurmdbd_msg(slurmdbd_msg_t *req) case DBD_GET_ASSOCS: case DBD_GET_CLUSTERS: case DBD_GET_JOBS_COND: + case DBD_GET_QOS: + case DBD_GET_TXN: case DBD_GET_USERS: case DBD_REMOVE_ACCOUNTS: case DBD_REMOVE_ASSOCS: case DBD_REMOVE_CLUSTERS: + case DBD_REMOVE_QOS: case DBD_REMOVE_USERS: slurmdbd_pack_cond_msg( req->msg_type, (dbd_cond_msg_t *)req->data, buffer); @@ -435,7 +441,9 @@ extern Buf pack_slurmdbd_msg(slurmdbd_msg_t *req) req->data, buffer); break; default: - error("slurmdbd: Invalid message type %u", req->msg_type); + error("slurmdbd: Invalid message type pack %u(%s)", + req->msg_type, + slurmdbd_msg_type_2_str(req->msg_type)); free_buf(buffer); return NULL; } @@ -458,6 +466,9 @@ extern int unpack_slurmdbd_msg(slurmdbd_msg_t *resp, Buf buffer) case DBD_GOT_CLUSTERS: case DBD_GOT_JOBS: case DBD_GOT_LIST: + case DBD_ADD_QOS: + case DBD_GOT_QOS: + case DBD_GOT_TXN: case DBD_GOT_USERS: case DBD_UPDATE_SHARES_USED: rc = slurmdbd_unpack_list_msg( @@ -478,9 +489,12 @@ extern int unpack_slurmdbd_msg(slurmdbd_msg_t *resp, Buf buffer) case DBD_GET_CLUSTERS: case DBD_GET_JOBS_COND: case DBD_GET_USERS: + case DBD_GET_QOS: + case DBD_GET_TXN: case DBD_REMOVE_ACCOUNTS: case DBD_REMOVE_ASSOCS: case DBD_REMOVE_CLUSTERS: + case DBD_REMOVE_QOS: case DBD_REMOVE_USERS: rc = slurmdbd_unpack_cond_msg( resp->msg_type, (dbd_cond_msg_t **)&resp->data, buffer); @@ -555,7 +569,9 @@ extern int unpack_slurmdbd_msg(slurmdbd_msg_t *resp, Buf buffer) (dbd_roll_usage_msg_t **)&resp->data, buffer); break; default: - error("slurmdbd: Invalid message type %u", resp->msg_type); + error("slurmdbd: Invalid message type unpack %u(%s)", + resp->msg_type, + slurmdbd_msg_type_2_str(resp->msg_type)); return SLURM_ERROR; } return rc; @@ -564,6 +580,283 @@ unpack_error: return SLURM_ERROR; } +extern slurmdbd_msg_type_t str_2_slurmdbd_msg_type(char *msg_type) +{ + if(!msg_type) { + return NO_VAL; + } else if(!strcasecmp(msg_type, "Init")) { + return DBD_INIT; + } else if(!strcasecmp(msg_type, "Fini")) { + return DBD_FINI; + } else if(!strcasecmp(msg_type, "Add Accounts")) { + return DBD_ADD_ACCOUNTS; + } else if(!strcasecmp(msg_type, "Add Account Coord")) { + return DBD_ADD_ACCOUNT_COORDS; + } else if(!strcasecmp(msg_type, "Add Associations")) { + return DBD_ADD_ASSOCS; + } else if(!strcasecmp(msg_type, "Add Clusters")) { + return DBD_ADD_CLUSTERS; + } else if(!strcasecmp(msg_type, "Add Users")) { + return DBD_ADD_USERS; + } else if(!strcasecmp(msg_type, "Cluster Processors")) { + return DBD_CLUSTER_PROCS; + } else if(!strcasecmp(msg_type, "Flush Jobs")) { + return DBD_FLUSH_JOBS; + } else if(!strcasecmp(msg_type, "Get Accounts")) { + return DBD_GET_ACCOUNTS; + } else if(!strcasecmp(msg_type, "Get Associations")) { + return DBD_GET_ASSOCS; + } else if(!strcasecmp(msg_type, "Get Association Usage")) { + return DBD_GET_ASSOC_USAGE; + } else if(!strcasecmp(msg_type, "Get Clusters")) { + return DBD_GET_CLUSTERS; + } else if(!strcasecmp(msg_type, "Get Cluster Usage")) { + return DBD_GET_CLUSTER_USAGE; + } else if(!strcasecmp(msg_type, "Get Jobs")) { + return DBD_GET_JOBS; + } else if(!strcasecmp(msg_type, "Get Users")) { + return DBD_GET_USERS; + } else if(!strcasecmp(msg_type, "Got Accounts")) { + return DBD_GOT_ACCOUNTS; + } else if(!strcasecmp(msg_type, "Got Associations")) { + return DBD_GOT_ASSOCS; + } else if(!strcasecmp(msg_type, "Got Association Usage")) { + return DBD_GOT_ASSOC_USAGE; + } else if(!strcasecmp(msg_type, "Got Clusters")) { + return DBD_GOT_CLUSTERS; + } else if(!strcasecmp(msg_type, "Got Cluster Usage")) { + return DBD_GOT_CLUSTER_USAGE; + } else if(!strcasecmp(msg_type, "Got Jobs")) { + return DBD_GOT_JOBS; + } else if(!strcasecmp(msg_type, "Got List")) { + return DBD_GOT_LIST; + } else if(!strcasecmp(msg_type, "Got Users")) { + return DBD_GOT_USERS; + } else if(!strcasecmp(msg_type, "Job Complete")) { + return DBD_JOB_COMPLETE; + } else if(!strcasecmp(msg_type, "Job Start")) { + return DBD_JOB_START; + } else if(!strcasecmp(msg_type, "Job Start RC")) { + return DBD_JOB_START_RC; + } else if(!strcasecmp(msg_type, "Job Suspend")) { + return DBD_JOB_SUSPEND; + } else if(!strcasecmp(msg_type, "Modify Accounts")) { + return DBD_MODIFY_ACCOUNTS; + } else if(!strcasecmp(msg_type, "Modify Associations")) { + return DBD_MODIFY_ASSOCS; + } else if(!strcasecmp(msg_type, "Modify Clusters")) { + return DBD_MODIFY_CLUSTERS; + } else if(!strcasecmp(msg_type, "Modify Users")) { + return DBD_MODIFY_USERS; + } else if(!strcasecmp(msg_type, "Node State")) { + return DBD_NODE_STATE; + } else if(!strcasecmp(msg_type, "RC")) { + return DBD_RC; + } else if(!strcasecmp(msg_type, "Register Cluster")) { + return DBD_REGISTER_CTLD; + } else if(!strcasecmp(msg_type, "Remove Accounts")) { + return DBD_REMOVE_ACCOUNTS; + } else if(!strcasecmp(msg_type, "Remove Account Coords")) { + return DBD_REMOVE_ACCOUNT_COORDS; + } else if(!strcasecmp(msg_type, "Remove Associations")) { + return DBD_REMOVE_ASSOCS; + } else if(!strcasecmp(msg_type, "Remove Clusters")) { + return DBD_REMOVE_CLUSTERS; + } else if(!strcasecmp(msg_type, "Remove Users")) { + return DBD_REMOVE_USERS; + } else if(!strcasecmp(msg_type, "Roll Usage")) { + return DBD_ROLL_USAGE; + } else if(!strcasecmp(msg_type, "Step Complete")) { + return DBD_STEP_COMPLETE; + } else if(!strcasecmp(msg_type, "Step Start")) { + return DBD_STEP_START; + } else if(!strcasecmp(msg_type, "Update Shares Used")) { + return DBD_UPDATE_SHARES_USED; + } else if(!strcasecmp(msg_type, "Get Jobs Conditional")) { + return DBD_GET_JOBS_COND; + } else if(!strcasecmp(msg_type, "Get Transations")) { + return DBD_GET_TXN; + } else if(!strcasecmp(msg_type, "Got Transations")) { + return DBD_GOT_TXN; + } else if(!strcasecmp(msg_type, "Add QOS")) { + return DBD_ADD_QOS; + } else if(!strcasecmp(msg_type, "Get QOS")) { + return DBD_GET_QOS; + } else if(!strcasecmp(msg_type, "Got QOS")) { + return DBD_GOT_QOS; + } else if(!strcasecmp(msg_type, "Remove QOS")) { + return DBD_REMOVE_QOS; + } else { + return NO_VAL; + } + + return NO_VAL; +} + +extern char *slurmdbd_msg_type_2_str(slurmdbd_msg_type_t msg_type) +{ + switch(msg_type) { + case DBD_INIT: + return "Init"; + break; + case DBD_FINI: + return "Fini"; + break; + case DBD_ADD_ACCOUNTS: + return "Add Accounts"; + break; + case DBD_ADD_ACCOUNT_COORDS: + return "Add Account Coord"; + break; + case DBD_ADD_ASSOCS: + return "Add Associations"; + break; + case DBD_ADD_CLUSTERS: + return "Add Clusters"; + break; + case DBD_ADD_USERS: + return "Add Users"; + break; + case DBD_CLUSTER_PROCS: + return "Cluster Processors"; + break; + case DBD_FLUSH_JOBS: + return "Flush Jobs"; + break; + case DBD_GET_ACCOUNTS: + return "Get Accounts"; + break; + case DBD_GET_ASSOCS: + return "Get Associations"; + break; + case DBD_GET_ASSOC_USAGE: + return "Get Association Usage"; + break; + case DBD_GET_CLUSTERS: + return "Get Clusters"; + break; + case DBD_GET_CLUSTER_USAGE: + return "Get Cluster Usage"; + break; + case DBD_GET_JOBS: + return "Get Jobs"; + break; + case DBD_GET_USERS: + return "Get Users"; + break; + case DBD_GOT_ACCOUNTS: + return "Got Accounts"; + break; + case DBD_GOT_ASSOCS: + return "Got Associations"; + break; + case DBD_GOT_ASSOC_USAGE: + return "Got Association Usage"; + break; + case DBD_GOT_CLUSTERS: + return "Got Clusters"; + break; + case DBD_GOT_CLUSTER_USAGE: + return "Got Cluster Usage"; + break; + case DBD_GOT_JOBS: + return "Got Jobs"; + break; + case DBD_GOT_LIST: + return "Got List"; + break; + case DBD_GOT_USERS: + return "Got Users"; + break; + case DBD_JOB_COMPLETE: + return "Job Complete"; + break; + case DBD_JOB_START: + return "Job Start"; + break; + case DBD_JOB_START_RC: + return "Job Start RC"; + break; + case DBD_JOB_SUSPEND: + return "Job Suspend"; + break; + case DBD_MODIFY_ACCOUNTS: + return "Modify Accounts"; + break; + case DBD_MODIFY_ASSOCS: + return "Modify Associations"; + break; + case DBD_MODIFY_CLUSTERS: + return "Modify Clusters"; + break; + case DBD_MODIFY_USERS: + return "Modify Users"; + break; + case DBD_NODE_STATE: + return "Node State"; + break; + case DBD_RC: + return "RC"; + break; + case DBD_REGISTER_CTLD: + return "Register Cluster"; + break; + case DBD_REMOVE_ACCOUNTS: + return "Remove Accounts"; + break; + case DBD_REMOVE_ACCOUNT_COORDS: + return "Remove Account Coords"; + break; + case DBD_REMOVE_ASSOCS: + return "Remove Associations"; + break; + case DBD_REMOVE_CLUSTERS: + return "Remove Clusters"; + break; + case DBD_REMOVE_USERS: + return "Remove Users"; + break; + case DBD_ROLL_USAGE: + return "Roll Usage"; + break; + case DBD_STEP_COMPLETE: + return "Step Complete"; + break; + case DBD_STEP_START: + return "Step Start"; + break; + case DBD_UPDATE_SHARES_USED: + return "Update Shares Used"; + break; + case DBD_GET_JOBS_COND: + return "Get Jobs Conditional"; + break; + case DBD_GET_TXN: + return "Get Transations"; + break; + case DBD_GOT_TXN: + return "Got Transations"; + break; + case DBD_ADD_QOS: + return "Add QOS"; + break; + case DBD_GET_QOS: + return "Get QOS"; + break; + case DBD_GOT_QOS: + return "Got QOS"; + break; + case DBD_REMOVE_QOS: + return "Remove QOS"; + break; + default: + return "Unknown"; + break; + } + + return "Unknown"; +} + static int _send_init_msg(void) { int rc; @@ -724,8 +1017,10 @@ static Buf _recv_msg(void) if (msg_read != sizeof(nw_size)) return NULL; msg_size = ntohl(nw_size); - if ((msg_size < 2) || (msg_size > 1000000)) { - error("slurmdbd: Invalid msg_size (%u)"); + /* We don't error check for an upper limit here + * since size could possibly be massive */ + if (msg_size < 2) { + error("slurmdbd: Invalid msg_size (%u)", msg_size); return NULL; } @@ -1248,6 +1543,13 @@ void inline slurmdbd_free_cond_msg(slurmdbd_msg_type_t type, case DBD_GET_JOBS_COND: my_destroy = destroy_acct_job_cond; break; + case DBD_GET_QOS: + case DBD_REMOVE_QOS: + my_destroy = destroy_acct_qos_cond; + break; + case DBD_GET_TXN: + my_destroy = destroy_acct_txn_cond; + break; case DBD_GET_USERS: case DBD_REMOVE_USERS: my_destroy = destroy_acct_user_cond; @@ -1537,10 +1839,17 @@ void inline slurmdbd_pack_cond_msg(slurmdbd_msg_type_t type, case DBD_GET_JOBS_COND: my_function = pack_acct_job_cond; break; + case DBD_GET_QOS: + case DBD_REMOVE_QOS: + my_function = pack_acct_qos_cond; + break; case DBD_GET_USERS: case DBD_REMOVE_USERS: my_function = pack_acct_user_cond; break; + case DBD_GET_TXN: + my_function = pack_acct_txn_cond; + break; default: fatal("Unknown pack type"); return; @@ -1571,10 +1880,17 @@ int inline slurmdbd_unpack_cond_msg(slurmdbd_msg_type_t type, case DBD_GET_JOBS_COND: my_function = unpack_acct_job_cond; break; + case DBD_GET_QOS: + case DBD_REMOVE_QOS: + my_function = unpack_acct_qos_cond; + break; case DBD_GET_USERS: case DBD_REMOVE_USERS: my_function = unpack_acct_user_cond; break; + case DBD_GET_TXN: + my_function = unpack_acct_txn_cond; + break; default: fatal("Unknown unpack type"); return SLURM_ERROR; @@ -1926,10 +2242,17 @@ void inline slurmdbd_pack_list_msg(slurmdbd_msg_type_t type, case DBD_GOT_LIST: my_function = _slurmdbd_packstr; break; + case DBD_ADD_QOS: + case DBD_GOT_QOS: + my_function = pack_acct_qos_rec; + break; case DBD_ADD_USERS: case DBD_GOT_USERS: my_function = pack_acct_user_rec; break; + case DBD_GOT_TXN: + my_function = pack_acct_txn_rec; + break; case DBD_UPDATE_SHARES_USED: my_function = pack_update_shares_used; break; @@ -1987,11 +2310,20 @@ int inline slurmdbd_unpack_list_msg(slurmdbd_msg_type_t type, my_function = _slurmdbd_unpackstr; my_destroy = slurm_destroy_char; break; + case DBD_ADD_QOS: + case DBD_GOT_QOS: + my_function = unpack_acct_qos_rec; + my_destroy = destroy_acct_qos_rec; + break; case DBD_ADD_USERS: case DBD_GOT_USERS: my_function = unpack_acct_user_rec; my_destroy = destroy_acct_user_rec; break; + case DBD_GOT_TXN: + my_function = unpack_acct_txn_rec; + my_destroy = destroy_acct_txn_rec; + break; case DBD_UPDATE_SHARES_USED: my_function = unpack_update_shares_used; my_destroy = destroy_update_shares_rec; diff --git a/src/common/slurmdbd_defs.h b/src/common/slurmdbd_defs.h index c75cd0842..8fa02c784 100644 --- a/src/common/slurmdbd_defs.h +++ b/src/common/slurmdbd_defs.h @@ -57,10 +57,38 @@ #include "src/common/list.h" #include "src/common/slurm_accounting_storage.h" -/* Increment SLURMDBD_VERSION if any of the RPCs change */ -#define SLURMDBD_VERSION 01 +/* + * SLURMDBD_VERSION is the version of the slurmdbd protocol currently + * being used (i.e. this code). Increment this value whenever an + * RPC is added. Do not modify an existing RPC, but create a new + * msg_type for the new format (add new entries to the end of + * slurmdbd_msg_type_t so numbering of existing msg_type values + * do not change). Comment the version number when a defunct + * msg_type stops being used. For example, rather than changing + * the format of the RPC for DBD_ADD_USERS, add a DBD_ADD_USERS_V2, + * stop using DBD_ADD_USERS and add comment of this sort "Last used + * in SLURMDBD_VERSION 05". The slurmdbd must continue to support + * old RPCs for some time (until all Slurm clusters in that grid + * get upgraded to use the new set of RPCs). At that time, slurmdbd + * can have support for the old RPCs removed. + * + * SLURMDBD_VERSION_MIN is the minimum protocol version which slurmdbd + * will accept. Messages being sent to the slurmdbd from commands + * or daemons using older versions of the protocol will be + * rejected. Increment this value and discard the code processing + * that msg_type only after all systems have been upgraded. Don't + * remove entries from slurmdbd_msg_type_t or the numbering scheme + * will break (the enum value of a msg_type would change). + * + * The slurmdbd should be at least as current as any Slurm cluster + * communicating with it (e.g. it will not accept messages with a + * version higher than SLURMDBD_VERSION). + */ +#define SLURMDBD_VERSION 02 +#define SLURMDBD_VERSION_MIN 02 /* SLURM DBD message types */ +/* ANY TIME YOU ADD TO THIS LIST UPDATE THE CONVERSION FUNCTIONS! */ typedef enum { DBD_INIT = 1400, /* Connection initialization */ DBD_FINI, /* Connection finalization */ @@ -108,7 +136,13 @@ typedef enum { DBD_STEP_COMPLETE, /* Record step completion */ DBD_STEP_START, /* Record step starting */ DBD_UPDATE_SHARES_USED, /* Record current share usage */ - DBD_GET_JOBS_COND /* Get job information with a condition */ + DBD_GET_JOBS_COND, /* Get job information with a condition */ + DBD_GET_TXN, /* Get transaction information */ + DBD_GOT_TXN, /* Got transaction information */ + DBD_ADD_QOS, /* Add QOS information */ + DBD_GET_QOS, /* Get QOS information */ + DBD_GOT_QOS, /* Got QOS information */ + DBD_REMOVE_QOS /* Remove QOS information */ } slurmdbd_msg_type_t; /*****************************************************************************\ @@ -319,6 +353,10 @@ extern int slurm_send_slurmdbd_recv_rc_msg(slurmdbd_msg_t *req, int *rc); extern Buf pack_slurmdbd_msg(slurmdbd_msg_t *req); extern int unpack_slurmdbd_msg(slurmdbd_msg_t *resp, Buf buffer); + +extern slurmdbd_msg_type_t str_2_slurmdbd_msg_type(char *msg_type); +extern char *slurmdbd_msg_type_2_str(slurmdbd_msg_type_t msg_type); + /*****************************************************************************\ * Free various SlurmDBD message structures \*****************************************************************************/ diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c index 038748c6c..4e8e8ee97 100644 --- a/src/common/stepd_api.c +++ b/src/common/stepd_api.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/common/stepd_api.c - slurmstepd message API - * $Id: stepd_api.c 14314 2008-06-23 20:57:56Z jette $ + * $Id: stepd_api.c 14503 2008-07-14 17:27:40Z jette $ ***************************************************************************** * Copyright (C) 2005-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -841,7 +841,7 @@ stepd_stat_jobacct(int fd, stat_jobacct_msg_t *sent, stat_jobacct_msg_t *resp) resp->num_tasks = tasks; return rc; rwfail: - error("an error occured %d", rc); + error("gathering job accounting: %d", rc); jobacct_gather_g_destroy(resp->jobacct); resp->jobacct = NULL; return rc; diff --git a/src/database/Makefile.am b/src/database/Makefile.am index c1538b051..31e2b7c0c 100644 --- a/src/database/Makefile.am +++ b/src/database/Makefile.am @@ -36,4 +36,3 @@ libslurm_pgsql_la_LDFLAGS = $(LIB_LDFLAGS) libslurm_mysql_la_CFLAGS = $(MYSQL_CFLAGS) libslurm_pgsql_la_CFLAGS = $(PGSQL_CFLAGS) - diff --git a/src/database/mysql_common.h b/src/database/mysql_common.h index 69acb54c4..c8fcb76a1 100644 --- a/src/database/mysql_common.h +++ b/src/database/mysql_common.h @@ -56,10 +56,19 @@ #include "src/common/list.h" #include "src/common/xstring.h" -#ifdef HAVE_MYSQL +#ifndef HAVE_MYSQL +typedef void mysql_conn_t; +#else #include <mysql.h> #include <mysqld_error.h> +typedef struct { + MYSQL *db_conn; + bool rollback; + List update_list; + int conn; +} mysql_conn_t; + typedef struct { uint32_t port; char *host; diff --git a/src/database/pgsql_common.h b/src/database/pgsql_common.h index cc48e8c5f..2762d57b6 100644 --- a/src/database/pgsql_common.h +++ b/src/database/pgsql_common.h @@ -56,9 +56,18 @@ #include "src/slurmctld/slurmctld.h" #include "src/common/xstring.h" -#ifdef HAVE_PGSQL +#ifndef HAVE_PGSQL +typedef void pgsql_conn_t; +#else #include <libpq-fe.h> +typedef struct { + PGconn *db_conn; + bool rollback; + List update_list; + int conn; +} pgsql_conn_t; + typedef struct { uint32_t port; char *host; diff --git a/src/plugins/accounting_storage/filetxt/Makefile.am b/src/plugins/accounting_storage/filetxt/Makefile.am index 4ea567fb1..b05f7f62c 100644 --- a/src/plugins/accounting_storage/filetxt/Makefile.am +++ b/src/plugins/accounting_storage/filetxt/Makefile.am @@ -11,4 +11,3 @@ pkglib_LTLIBRARIES = accounting_storage_filetxt.la accounting_storage_filetxt_la_SOURCES = accounting_storage_filetxt.c \ filetxt_jobacct_process.c filetxt_jobacct_process.h accounting_storage_filetxt_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) - diff --git a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c index 58b4cf5f0..22c52c033 100644 --- a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c +++ b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c @@ -180,6 +180,12 @@ extern int init ( void ) mode_t prot = 0600; struct stat statbuf; + if(slurmdbd_conf) { + fatal("The filetxt plugin should not " + "be run from the slurmdbd. " + "Please use a database plugin"); + } + if(first) { debug2("jobacct_init() called"); log_file = slurm_get_accounting_storage_loc(); @@ -274,6 +280,12 @@ extern int acct_storage_p_add_associations(void *db_conn, uint32_t uid, return SLURM_SUCCESS; } +extern int acct_storage_p_add_qos(void *db_conn, uint32_t uid, + List qos_list) +{ + return SLURM_SUCCESS; +} + extern List acct_storage_p_modify_users(void *db_conn, uint32_t uid, acct_user_cond_t *user_q, acct_user_rec_t *user) @@ -333,6 +345,12 @@ extern List acct_storage_p_remove_associations(void *db_conn, uint32_t uid, return SLURM_SUCCESS; } +extern List acct_storage_p_remove_qos(void *db_conn, uint32_t uid, + acct_qos_cond_t *qos_cond) +{ + return NULL; +} + extern List acct_storage_p_get_users(void *db_conn, acct_user_cond_t *user_q) { @@ -357,6 +375,18 @@ extern List acct_storage_p_get_associations(void *db_conn, return NULL; } +extern List acct_storage_p_get_qos(void *db_conn, + acct_qos_cond_t *qos_cond) +{ + return NULL; +} + +extern List acct_storage_p_get_txn(void *db_conn, + acct_txn_cond_t *txn_cond) +{ + return NULL; +} + extern int acct_storage_p_get_usage(void *db_conn, acct_association_rec_t *acct_assoc, time_t start, time_t end) @@ -800,10 +830,37 @@ extern int jobacct_storage_p_suspend(void *db_conn, extern List jobacct_storage_p_get_jobs(void *db_conn, List selected_steps, List selected_parts, - void *params) -{ - return filetxt_jobacct_process_get_jobs(selected_steps, selected_parts, - params); + sacct_parameters_t *params) +{ + List job_list = NULL; + acct_job_cond_t job_cond; + memset(&job_cond, 0, sizeof(acct_job_cond_t)); + + job_cond.acct_list = selected_steps; + job_cond.step_list = selected_steps; + job_cond.partition_list = selected_parts; + job_cond.cluster_list = params->opt_cluster_list; + + if (params->opt_uid >=0) { + char *temp = xstrdup_printf("%u", params->opt_uid); + job_cond.userid_list = list_create(NULL); + list_append(job_cond.userid_list, temp); + } + + if (params->opt_gid >=0) { + char *temp = xstrdup_printf("%u", params->opt_gid); + job_cond.groupid_list = list_create(NULL); + list_append(job_cond.groupid_list, temp); + } + + job_list = filetxt_jobacct_process_get_jobs(&job_cond); + + if(job_cond.userid_list) + list_destroy(job_cond.userid_list); + if(job_cond.groupid_list) + list_destroy(job_cond.groupid_list); + + return job_list; } /* @@ -814,27 +871,7 @@ extern List jobacct_storage_p_get_jobs(void *db_conn, extern List jobacct_storage_p_get_jobs_cond(void *db_conn, acct_job_cond_t *job_cond) { - sacct_parameters_t params; - - memset(¶ms, 0, sizeof(sacct_parameters_t)); - params.opt_uid = -1; - - if(job_cond->cluster_list && list_count(job_cond->cluster_list)) { - params.opt_cluster = list_pop(job_cond->cluster_list); - } - if(job_cond->user_list && list_count(job_cond->user_list)) { - char *user = list_pop(job_cond->user_list); - struct passwd *pw = NULL; - if ((pw=getpwnam(user))) - params.opt_uid = pw->pw_uid; - xfree(user); - } - - return filetxt_jobacct_process_get_jobs(job_cond->step_list, - job_cond->partition_list, - ¶ms); - if(params.opt_cluster) - xfree(params.opt_cluster); + return filetxt_jobacct_process_get_jobs(job_cond); } /* diff --git a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c index 3711582c9..db3ec5d2e 100644 --- a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c +++ b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c @@ -45,8 +45,7 @@ #include "src/common/xstring.h" #include "src/common/xmalloc.h" -#include "src/common/slurm_protocol_api.h" -#include "src/common/jobacct_common.h" +#include "filetxt_jobacct_process.h" #include "src/slurmctld/slurmctld.h" #include "src/slurmdbd/read_config.h" /* Map field names to positions */ @@ -275,12 +274,33 @@ static jobacct_step_rec_t *_create_jobacct_step_rec( } static jobacct_job_rec_t *_create_jobacct_job_rec( - filetxt_job_rec_t *filetxt_job) + filetxt_job_rec_t *filetxt_job, acct_job_cond_t *job_cond) { - jobacct_job_rec_t *jobacct_job = create_jobacct_job_rec(); + jobacct_job_rec_t *jobacct_job = NULL; ListIterator itr = NULL; filetxt_step_rec_t *filetxt_step = NULL; + if(!job_cond) + goto no_cond; + + if (job_cond->state_list + && list_count(job_cond->state_list)) { + char *object = NULL; + itr = list_iterator_create(job_cond->state_list); + while((object = list_next(itr))) { + if (atoi(object) == filetxt_job->status) { + list_iterator_destroy(itr); + goto foundstate; + } + } + list_iterator_destroy(itr); + return NULL; /* no match */ + } + +foundstate: + +no_cond: + jobacct_job = create_jobacct_job_rec(); jobacct_job->associd = 0; jobacct_job->account = xstrdup(filetxt_job->account); jobacct_job->blockid = xstrdup(filetxt_job->header.blockid); @@ -835,8 +855,7 @@ static void _process_start(List job_list, char *f[], int lc, } static void _process_step(List job_list, char *f[], int lc, - int show_full, int len, - sacct_parameters_t *params) + int show_full, int len) { filetxt_job_rec_t *job = NULL; @@ -844,7 +863,7 @@ static void _process_step(List job_list, char *f[], int lc, filetxt_step_rec_t *temp = NULL; _parse_line(f, (void **)&temp, len); - + job = _find_job_record(job_list, temp->header, JOB_STEP); if (temp->stepnum == -2) { @@ -854,11 +873,9 @@ static void _process_step(List job_list, char *f[], int lc, if (!job) { /* fake it for now */ job = _create_filetxt_job_rec(temp->header); job->jobname = xstrdup("(unknown)"); - if (params->opt_verbose > 1) - fprintf(stderr, - "Note: JOB_STEP record %u.%u preceded " - "JOB_START record at line %d\n", - temp->header.jobnum, temp->stepnum, lc); + debug2("Note: JOB_STEP record %u.%u preceded " + "JOB_START record at line %d\n", + temp->header.jobnum, temp->stepnum, lc); } job->show_full = show_full; @@ -945,8 +962,7 @@ static void _process_suspend(List job_list, char *f[], int lc, } static void _process_terminated(List job_list, char *f[], int lc, - int show_full, int len, - sacct_parameters_t *params) + int show_full, int len) { filetxt_job_rec_t *job = NULL; filetxt_job_rec_t *temp = NULL; @@ -956,20 +972,17 @@ static void _process_terminated(List job_list, char *f[], int lc, if (!job) { /* fake it for now */ job = _create_filetxt_job_rec(temp->header); job->jobname = xstrdup("(unknown)"); - if (params->opt_verbose > 1) - fprintf(stderr, "Note: JOB_TERMINATED record for job " - "%u preceded " - "other job records at line %d\n", - temp->header.jobnum, lc); + debug("Note: JOB_TERMINATED record for job " + "%u preceded " + "other job records at line %d\n", + temp->header.jobnum, lc); } else if (job->job_terminated_seen) { if (temp->status == JOB_NODE_FAIL) { /* multiple node failures - extra TERMINATED records */ - if (params->opt_verbose > 1) - fprintf(stderr, - "Note: Duplicate JOB_TERMINATED " - "record (nf) for job %u at " - "line %d\n", - temp->header.jobnum, lc); + debug("Note: Duplicate JOB_TERMINATED " + "record (nf) for job %u at " + "line %d\n", + temp->header.jobnum, lc); /* JOB_TERMINATED/NF records may be preceded * by a JOB_TERMINATED/CA record; NF is much * more interesting. @@ -999,32 +1012,40 @@ finished: _destroy_filetxt_job_rec(temp); } -extern List filetxt_jobacct_process_get_jobs(List selected_steps, - List selected_parts, - sacct_parameters_t *params) +extern List filetxt_jobacct_process_get_jobs(acct_job_cond_t *job_cond) { char line[BUFFER_SIZE]; char *f[MAX_RECORD_FIELDS+1]; /* End list with null entry and, possibly, more data than we expected */ - char *fptr; + char *fptr = NULL, *filein = NULL; int i; FILE *fd = NULL; int lc = 0; int rec_type = -1; + int job_id = 0, step_id = 0, uid = 0, gid = 0; filetxt_job_rec_t *filetxt_job = NULL; jobacct_selected_step_t *selected_step = NULL; - char *selected_part = NULL; - ListIterator itr = NULL; + char *object = NULL; + ListIterator itr = NULL, itr2 = NULL; int show_full = 0; + int fdump_flag = 0; List ret_job_list = list_create(destroy_jobacct_job_rec); List job_list = list_create(_destroy_filetxt_job_rec); - if(slurmdbd_conf) { - params->opt_filein = slurm_get_accounting_storage_loc(); + filein = slurm_get_accounting_storage_loc(); + + /* we grab the fdump only for the filetxt plug through the + FDUMP_FLAG on the job_cond->duplicates variable. We didn't + add this extra field to the structure since it only applies + to this plugin. + */ + if(job_cond) { + fdump_flag = job_cond->duplicates & FDUMP_FLAG; + job_cond->duplicates &= (~FDUMP_FLAG); } - fd = _open_log_file(params->opt_filein); + fd = _open_log_file(filein); while (fgets(line, BUFFER_SIZE, fd)) { lc++; @@ -1048,20 +1069,62 @@ extern List filetxt_jobacct_process_get_jobs(List selected_steps, } rec_type = atoi(f[F_RECTYPE]); - - if (list_count(selected_steps)) { - itr = list_iterator_create(selected_steps); + job_id = atoi(f[F_JOB]); + uid = atoi(f[F_UID]); + gid = atoi(f[F_GID]); + + if(rec_type == JOB_STEP) + step_id = atoi(f[F_JOBSTEP]); + else + step_id = NO_VAL; + + if(!job_cond) { + show_full = 1; + goto no_cond; + } + + if (job_cond->userid_list + && list_count(job_cond->userid_list)) { + itr = list_iterator_create(job_cond->userid_list); + while((object = list_next(itr))) { + if (atoi(object) == uid) { + list_iterator_destroy(itr); + goto founduid; + } + } + list_iterator_destroy(itr); + continue; /* no match */ + } + founduid: + + if (job_cond->groupid_list + && list_count(job_cond->groupid_list)) { + itr = list_iterator_create(job_cond->groupid_list); + while((object = list_next(itr))) { + if (atoi(object) == gid) { + list_iterator_destroy(itr); + goto foundgid; + } + } + list_iterator_destroy(itr); + continue; /* no match */ + } + foundgid: + + if (job_cond->step_list + && list_count(job_cond->step_list)) { + itr = list_iterator_create(job_cond->step_list); while((selected_step = list_next(itr))) { - if (strcmp(selected_step->job, f[F_JOB])) + if (selected_step->jobid != job_id) continue; /* job matches; does the step? */ - if(selected_step->step == NULL) { + if(selected_step->stepid == NO_VAL) { show_full = 1; list_iterator_destroy(itr); goto foundjob; } else if (rec_type != JOB_STEP - || !strcmp(f[F_JOBSTEP], - selected_step->step)) { + || selected_step->stepid + == step_id) { list_iterator_destroy(itr); goto foundjob; } @@ -1073,11 +1136,11 @@ extern List filetxt_jobacct_process_get_jobs(List selected_steps, } foundjob: - if (list_count(selected_parts)) { - itr = list_iterator_create(selected_parts); - while((selected_part = list_next(itr))) - if (!strcasecmp(f[F_PARTITION], - selected_part)) { + if (job_cond->partition_list + && list_count(job_cond->partition_list)) { + itr = list_iterator_create(job_cond->partition_list); + while((object = list_next(itr))) + if (!strcasecmp(f[F_PARTITION], object)) { list_iterator_destroy(itr); goto foundp; } @@ -1085,12 +1148,13 @@ extern List filetxt_jobacct_process_get_jobs(List selected_steps, continue; /* no match */ } foundp: - - if (params->opt_fdump) { + if (fdump_flag) { _do_fdump(f, lc); continue; } - + + no_cond: + /* Build suitable tables with all the data */ switch(rec_type) { case JOB_START: @@ -1105,8 +1169,7 @@ extern List filetxt_jobacct_process_get_jobs(List selected_steps, printf("Bad data on a Step entry\n"); _show_rec(f); } else - _process_step(job_list, f, lc, show_full, i, - params); + _process_step(job_list, f, lc, show_full, i); break; case JOB_SUSPEND: if(i < F_JOB_REQUID) { @@ -1122,36 +1185,53 @@ extern List filetxt_jobacct_process_get_jobs(List selected_steps, _show_rec(f); } else _process_terminated(job_list, f, lc, - show_full, i, params); + show_full, i); break; default: - if (params->opt_verbose > 1) - fprintf(stderr, - "Invalid record at line %d of " - "input file\n", - lc); - if (params->opt_verbose > 2) - _show_rec(f); + debug("Invalid record at line %d of input file", lc); + _show_rec(f); break; } } if (ferror(fd)) { - perror(params->opt_filein); + perror(filein); exit(1); } fclose(fd); itr = list_iterator_create(job_list); + if(!job_cond->duplicates) + itr2 = list_iterator_create(ret_job_list); while((filetxt_job = list_next(itr))) { - list_append(ret_job_list, _create_jobacct_job_rec(filetxt_job)); + jobacct_job_rec_t *jobacct_job = + _create_jobacct_job_rec(filetxt_job, job_cond); + if(jobacct_job) { + jobacct_job_rec_t *curr_job = NULL; + if(job_cond && !job_cond->duplicates) { + while((curr_job = list_next(itr2))) { + if (curr_job->jobid == + jobacct_job->jobid) { + list_delete_item(itr2); + break; + } + } + } + list_append(ret_job_list, jobacct_job); + + if(!job_cond->duplicates) + list_iterator_reset(itr2); + } } + + if(!job_cond->duplicates) + list_iterator_destroy(itr2); + list_iterator_destroy(itr); list_destroy(job_list); - if(slurmdbd_conf) { - xfree(params->opt_filein); - } + xfree(filein); + return ret_job_list; } diff --git a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.h b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.h index a5ba22def..acd4a43b1 100644 --- a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.h +++ b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.h @@ -44,11 +44,10 @@ #define _HAVE_FILETXT_JOBACCT_PROCESS_H #include "src/common/jobacct_common.h" +#include "src/common/slurm_accounting_storage.h" #include "src/slurmdbd/read_config.h" -extern List filetxt_jobacct_process_get_jobs(List selected_steps, - List selected_parts, - sacct_parameters_t *params); +extern List filetxt_jobacct_process_get_jobs(acct_job_cond_t *job_cond); extern void filetxt_jobacct_process_archive(List selected_parts, sacct_parameters_t *params); diff --git a/src/plugins/accounting_storage/gold/accounting_storage_gold.c b/src/plugins/accounting_storage/gold/accounting_storage_gold.c index 3c5fca395..bd9629fd9 100644 --- a/src/plugins/accounting_storage/gold/accounting_storage_gold.c +++ b/src/plugins/accounting_storage/gold/accounting_storage_gold.c @@ -455,10 +455,18 @@ static List _get_user_list_from_response(gold_response_t *gold_response) if(!strcmp(name_val->name, "Name")) { user_rec->name = xstrdup(name_val->value); - } else if(!strcmp(name_val->name, "Expedite")) { - user_rec->qos = - atoi(name_val->value)+1; - } else if(!strcmp(name_val->name, "DefaultProject")) { + } /* else if(!strcmp(name_val->name, "Expedite")) { */ +/* if(user_rec->qos_list) */ +/* continue; */ +/* user_rec->qos_list = */ +/* list_create(slurm_destroy_char); */ +/* /\*really needs to have 1 added here */ +/* but we shouldn't ever need to use */ +/* this. */ +/* *\/ */ +/* slurm_addto_char_list(user_rec->qos_list, */ +/* name_val->value); */ +/* } */else if(!strcmp(name_val->name, "DefaultProject")) { user_rec->default_acct = xstrdup(name_val->value); } else { @@ -491,10 +499,10 @@ static List _get_acct_list_from_response(gold_response_t *gold_response) itr2 = list_iterator_create(resp_entry->name_val); while((name_val = list_next(itr2))) { - if(!strcmp(name_val->name, "Expedite")) { - acct_rec->qos = - atoi(name_val->value)+1; - } else if(!strcmp(name_val->name, + /* if(!strcmp(name_val->name, "Expedite")) { */ +/* acct_rec->qos = */ +/* atoi(name_val->value)+1; */ +/* } else */ if(!strcmp(name_val->name, "Name")) { acct_rec->name = xstrdup(name_val->value); @@ -740,7 +748,7 @@ extern int acct_storage_p_add_users(void *db_conn, gold_request_t *gold_request = NULL; gold_response_t *gold_response = NULL; acct_user_rec_t *object = NULL; - char tmp_buff[50]; +// char tmp_buff[50]; itr = list_iterator_create(user_list); while((object = list_next(itr))) { @@ -762,12 +770,12 @@ extern int acct_storage_p_add_users(void *db_conn, gold_request_add_assignment(gold_request, "DefaultProject", object->default_acct); - if(object->qos != ACCT_QOS_NOTSET) { - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - object->qos-1); - gold_request_add_assignment(gold_request, "Expedite", - tmp_buff); - } +/* if(object->qos != ACCT_QOS_NOTSET) { */ +/* snprintf(tmp_buff, sizeof(tmp_buff), "%u", */ +/* object->qos-1); */ +/* gold_request_add_assignment(gold_request, "Expedite", */ +/* tmp_buff); */ +/* } */ gold_response = get_gold_response(gold_request); destroy_gold_request(gold_request); @@ -809,7 +817,7 @@ extern int acct_storage_p_add_accts(void *db_conn, gold_request_t *gold_request = NULL; gold_response_t *gold_response = NULL; acct_account_rec_t *object = NULL; - char tmp_buff[50]; +// char tmp_buff[50]; itr = list_iterator_create(acct_list); while((object = list_next(itr))) { @@ -833,12 +841,12 @@ extern int acct_storage_p_add_accts(void *db_conn, object->description); gold_request_add_assignment(gold_request, "Organization", object->organization); - if(object->qos != ACCT_QOS_NOTSET) { - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - object->qos-1); - gold_request_add_assignment(gold_request, "Expedite", - tmp_buff); - } +/* if(object->qos != ACCT_QOS_NOTSET) { */ +/* snprintf(tmp_buff, sizeof(tmp_buff), "%u", */ +/* object->qos-1); */ +/* gold_request_add_assignment(gold_request, "Expedite", */ +/* tmp_buff); */ +/* } */ gold_response = get_gold_response(gold_request); destroy_gold_request(gold_request); @@ -1109,6 +1117,12 @@ extern int acct_storage_p_validate_assoc_id(void *db_conn, return SLURM_SUCCESS; } +extern int acct_storage_p_add_qos(void *db_conn, uint32_t uid, + List qos_list) +{ + return SLURM_SUCCESS; +} + extern List acct_storage_p_modify_users(void *db_conn, acct_user_cond_t *user_q, acct_user_rec_t *user) @@ -1118,7 +1132,7 @@ extern List acct_storage_p_modify_users(void *db_conn, gold_request_t *gold_request = NULL; gold_response_t *gold_response = NULL; char *object = NULL; - char tmp_buff[50]; +// char tmp_buff[50]; int set = 0; if(!user_q) { @@ -1141,9 +1155,10 @@ extern List acct_storage_p_modify_users(void *db_conn, return NULL; } - if(user_q->user_list && list_count(user_q->user_list)) { - itr = list_iterator_create(user_q->user_list); - if(list_count(user_q->user_list) > 1) + if(user_q->assoc_cond->user_list + && list_count(user_q->assoc_cond->user_list)) { + itr = list_iterator_create(user_q->assoc_cond->user_list); + if(list_count(user_q->assoc_cond->user_list) > 1) set = 2; else set = 0; @@ -1179,12 +1194,12 @@ extern List acct_storage_p_modify_users(void *db_conn, "DefaultProject", user->default_acct); - if(user->qos != ACCT_QOS_NOTSET) { - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - user->qos-1); - gold_request_add_assignment(gold_request, "Expedite", - tmp_buff); - } +/* if(user->qos != ACCT_QOS_NOTSET) { */ +/* snprintf(tmp_buff, sizeof(tmp_buff), "%u", */ +/* user->qos-1); */ +/* gold_request_add_assignment(gold_request, "Expedite", */ +/* tmp_buff); */ +/* } */ gold_response = get_gold_response(gold_request); destroy_gold_request(gold_request); @@ -1259,9 +1274,10 @@ extern List acct_storage_p_modify_user_admin_level(void *db_conn, return NULL; } - if(user_q->user_list && list_count(user_q->user_list)) { - itr = list_iterator_create(user_q->user_list); - if(list_count(user_q->user_list) > 1) + if(user_q->assoc_cond->user_list + && list_count(user_q->assoc_cond->user_list)) { + itr = list_iterator_create(user_q->assoc_cond->user_list); + if(list_count(user_q->assoc_cond->user_list) > 1) set = 2; else set = 0; @@ -1321,7 +1337,7 @@ extern List acct_storage_p_modify_accts(void *db_conn, // int rc = SLURM_SUCCESS; gold_request_t *gold_request = NULL; gold_response_t *gold_response = NULL; - char tmp_buff[50]; +// char tmp_buff[50]; int set = 0; char *object = NULL; @@ -1344,9 +1360,10 @@ extern List acct_storage_p_modify_accts(void *db_conn, return NULL; } - if(acct_q->acct_list && list_count(acct_q->acct_list)) { - itr = list_iterator_create(acct_q->acct_list); - if(list_count(acct_q->acct_list) > 1) + if(acct_q->assoc_cond->acct_list + && list_count(acct_q->assoc_cond->acct_list)) { + itr = list_iterator_create(acct_q->assoc_cond->acct_list); + if(list_count(acct_q->assoc_cond->acct_list) > 1) set = 2; else set = 0; @@ -1403,12 +1420,12 @@ extern List acct_storage_p_modify_accts(void *db_conn, "Organization", acct->organization); - if(acct->qos != ACCT_QOS_NOTSET) { - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - acct->qos-1); - gold_request_add_assignment(gold_request, "Expedite", - tmp_buff); - } +/* if(acct->qos != ACCT_QOS_NOTSET) { */ +/* snprintf(tmp_buff, sizeof(tmp_buff), "%u", */ +/* acct->qos-1); */ +/* gold_request_add_assignment(gold_request, "Expedite", */ +/* tmp_buff); */ +/* } */ gold_response = get_gold_response(gold_request); destroy_gold_request(gold_request); @@ -1616,9 +1633,10 @@ extern List acct_storage_p_remove_users(void *db_conn, return NULL; } - if(user_q->user_list && list_count(user_q->user_list)) { - itr = list_iterator_create(user_q->user_list); - if(list_count(user_q->user_list) > 1) + if(user_q->assoc_cond->user_list + && list_count(user_q->assoc_cond->user_list)) { + itr = list_iterator_create(user_q->assoc_cond->user_list); + if(list_count(user_q->assoc_cond->user_list) > 1) set = 2; else set = 0; @@ -1702,9 +1720,10 @@ extern List acct_storage_p_remove_accts(void *db_conn, return NULL; } - if(acct_q->acct_list && list_count(acct_q->acct_list)) { - itr = list_iterator_create(acct_q->acct_list); - if(list_count(acct_q->acct_list) > 1) + if(acct_q->assoc_cond->acct_list + && list_count(acct_q->assoc_cond->acct_list)) { + itr = list_iterator_create(acct_q->assoc_cond->acct_list); + if(list_count(acct_q->assoc_cond->acct_list) > 1) set = 2; else set = 0; @@ -2060,6 +2079,12 @@ extern List acct_storage_p_remove_associations(void *db_conn, return NULL; } +extern List acct_storage_p_remove_qos(void *db_conn, uint32_t uid, + acct_qos_cond_t *qos_cond) +{ + return NULL; +} + extern List acct_storage_p_get_users(void *db_conn, acct_user_cond_t *user_q) { @@ -2069,7 +2094,7 @@ extern List acct_storage_p_get_users(void *db_conn, ListIterator itr = NULL; char *object = NULL; int set = 0; - char tmp_buff[50]; +// char tmp_buff[50]; gold_request = create_gold_request(GOLD_OBJECT_USER, GOLD_ACTION_QUERY); @@ -2080,9 +2105,10 @@ extern List acct_storage_p_get_users(void *db_conn, if(!user_q) goto empty; - if(user_q->user_list && list_count(user_q->user_list)) { - itr = list_iterator_create(user_q->user_list); - if(list_count(user_q->user_list) > 1) + if(user_q->assoc_cond->user_list + && list_count(user_q->assoc_cond->user_list)) { + itr = list_iterator_create(user_q->assoc_cond->user_list); + if(list_count(user_q->assoc_cond->user_list) > 1) set = 2; else set = 0; @@ -2113,13 +2139,13 @@ extern List acct_storage_p_get_users(void *db_conn, list_iterator_destroy(itr); } - if(user_q->qos != ACCT_QOS_NOTSET) { - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - user_q->qos-1); - gold_request_add_condition(gold_request, "Expedite", - tmp_buff, - GOLD_OPERATOR_NONE, 0); - } +/* if(user_q->qos != ACCT_QOS_NOTSET) { */ +/* snprintf(tmp_buff, sizeof(tmp_buff), "%u", */ +/* user_q->qos-1); */ +/* gold_request_add_condition(gold_request, "Expedite", */ +/* tmp_buff, */ +/* GOLD_OPERATOR_NONE, 0); */ +/* } */ empty: gold_request_add_condition(gold_request, "Active", @@ -2160,7 +2186,7 @@ extern List acct_storage_p_get_accts(void *db_conn, ListIterator itr = NULL; int set = 0; char *object = NULL; - char tmp_buff[50]; +// char tmp_buff[50]; gold_request = create_gold_request(GOLD_OBJECT_PROJECT, @@ -2171,9 +2197,10 @@ extern List acct_storage_p_get_accts(void *db_conn, if(!acct_q) goto empty; - if(acct_q->acct_list && list_count(acct_q->acct_list)) { - itr = list_iterator_create(acct_q->acct_list); - if(list_count(acct_q->acct_list) > 1) + if(acct_q->assoc_cond->acct_list + && list_count(acct_q->assoc_cond->acct_list)) { + itr = list_iterator_create(acct_q->assoc_cond->acct_list); + if(list_count(acct_q->assoc_cond->acct_list) > 1) set = 2; else set = 0; @@ -2221,13 +2248,13 @@ extern List acct_storage_p_get_accts(void *db_conn, list_iterator_destroy(itr); } - if(acct_q->qos != ACCT_QOS_NOTSET) { - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - acct_q->qos-1); - gold_request_add_condition(gold_request, "Expedite", - tmp_buff, - GOLD_OPERATOR_NONE, 0); - } +/* if(acct_q->qos != ACCT_QOS_NOTSET) { */ +/* snprintf(tmp_buff, sizeof(tmp_buff), "%u", */ +/* acct_q->qos-1); */ +/* gold_request_add_condition(gold_request, "Expedite", */ +/* tmp_buff, */ +/* GOLD_OPERATOR_NONE, 0); */ +/* } */ empty: gold_request_add_condition(gold_request, "Active", "True", @@ -2434,6 +2461,18 @@ empty: return association_list; } +extern List acct_storage_p_get_qos(void *db_conn, + acct_qos_cond_t *qos_cond) +{ + return NULL; +} + +extern List acct_storage_p_get_txn(void *db_conn, + acct_txn_cond_t *txn_cond) +{ + return NULL; +} + extern int acct_storage_p_get_usage(void *db_conn, acct_association_rec_t *acct_assoc, time_t start, time_t end) @@ -3149,20 +3188,20 @@ extern List jobacct_storage_p_get_jobs(void *db_conn, /* "id %u", */ /* account_rec.id); */ - if(account_rec.cluster) { - if(params->opt_cluster && - strcmp(params->opt_cluster, - account_rec. - cluster)) { - destroy_jobacct_job_rec( - job); - job = NULL; - break; - } - job->cluster = - xstrdup(account_rec. - cluster); - } +/* if(account_rec.cluster) { */ +/* if(params->opt_cluster && */ +/* strcmp(params->opt_cluster, */ +/* account_rec. */ +/* cluster)) { */ +/* destroy_jobacct_job_rec( */ +/* job); */ +/* job = NULL; */ +/* break; */ +/* } */ +/* job->cluster = */ +/* xstrdup(account_rec. */ +/* cluster); */ +/* } */ if(account_rec.user) { struct passwd *passwd_ptr = @@ -3215,9 +3254,9 @@ extern List jobacct_storage_p_get_jobs(void *db_conn, job->state = atoi(name_val->value); } else if(!strcmp(name_val->name, "ExitCode")) { job->exitcode = atoi(name_val->value); - } else if(!strcmp(name_val->name, "QoS")) { - job->qos = atoi(name_val->value); - } + } /* else if(!strcmp(name_val->name, "QoS")) { */ +/* job->qos = atoi(name_val->value); */ +/* } */ } list_iterator_destroy(itr2); diff --git a/src/plugins/accounting_storage/mysql/Makefile.am b/src/plugins/accounting_storage/mysql/Makefile.am index a34ba8aa8..c7414d3eb 100644 --- a/src/plugins/accounting_storage/mysql/Makefile.am +++ b/src/plugins/accounting_storage/mysql/Makefile.am @@ -16,6 +16,7 @@ accounting_storage_mysql_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) accounting_storage_mysql_la_CFLAGS = $(MYSQL_CFLAGS) accounting_storage_mysql_la_LIBADD = \ $(top_builddir)/src/database/libslurm_mysql.la $(MYSQL_LIBS) -accounting_storage_mysql_la_DEPENDENCIES = \ - $(top_builddir)/src/database/libslurm_mysql.la +force: +$(accounting_storage_mysql_la_LIBADD) : force + @cd `dirname $@` && $(MAKE) `basename $@` diff --git a/src/plugins/accounting_storage/mysql/Makefile.in b/src/plugins/accounting_storage/mysql/Makefile.in index 2a69a97b1..b3f1531ee 100644 --- a/src/plugins/accounting_storage/mysql/Makefile.in +++ b/src/plugins/accounting_storage/mysql/Makefile.in @@ -76,6 +76,9 @@ am__installdirs = "$(DESTDIR)$(pkglibdir)" pkglibLTLIBRARIES_INSTALL = $(INSTALL) LTLIBRARIES = $(pkglib_LTLIBRARIES) am__DEPENDENCIES_1 = +accounting_storage_mysql_la_DEPENDENCIES = \ + $(top_builddir)/src/database/libslurm_mysql.la \ + $(am__DEPENDENCIES_1) am_accounting_storage_mysql_la_OBJECTS = \ accounting_storage_mysql_la-accounting_storage_mysql.lo \ accounting_storage_mysql_la-mysql_jobacct_process.lo \ @@ -285,9 +288,6 @@ accounting_storage_mysql_la_CFLAGS = $(MYSQL_CFLAGS) accounting_storage_mysql_la_LIBADD = \ $(top_builddir)/src/database/libslurm_mysql.la $(MYSQL_LIBS) -accounting_storage_mysql_la_DEPENDENCIES = \ - $(top_builddir)/src/database/libslurm_mysql.la - all: all-am .SUFFIXES: @@ -588,6 +588,10 @@ uninstall-am: uninstall-pkglibLTLIBRARIES mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES + +force: +$(accounting_storage_mysql_la_LIBADD) : force + @cd `dirname $@` && $(MAKE) `basename $@` # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c index 1230c413c..0e06f7ce4 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c @@ -106,6 +106,7 @@ char *cluster_month_table = "cluster_month_usage_table"; char *cluster_table = "cluster_table"; char *event_table = "cluster_event_table"; char *job_table = "job_table"; +char *qos_table = "qos_table"; char *step_table = "step_table"; char *txn_table = "txn_table"; char *user_table = "user_table"; @@ -118,8 +119,9 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, uint32_t uid, List association_list); -extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, - acct_association_cond_t *assoc_q); +extern List acct_storage_p_get_associations( + mysql_conn_t *mysql_conn, + acct_association_cond_t *assoc_cond); extern int acct_storage_p_get_usage(mysql_conn_t *mysql_conn, acct_association_rec_t *acct_assoc, @@ -135,11 +137,11 @@ static int _check_connection(mysql_conn_t *mysql_conn) if(!mysql_conn) { error("We need a connection to run this"); return SLURM_ERROR; - } else if(!mysql_conn->acct_mysql_db - || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) { - if(mysql_get_db_connection(&mysql_conn->acct_mysql_db, + } else if(!mysql_conn->db_conn + || mysql_db_ping(mysql_conn->db_conn) != 0) { + if(mysql_get_db_connection(&mysql_conn->db_conn, mysql_db_name, mysql_db_info) - != SLURM_SUCCESS) { + != SLURM_SUCCESS) { error("unable to re-connect to mysql database"); return SLURM_ERROR; } @@ -191,6 +193,11 @@ static int _addto_update_list(List update_list, acct_update_type_t type, update_object->objects = list_create( destroy_acct_association_rec); break; + case ACCT_ADD_QOS: + case ACCT_REMOVE_QOS: + update_object->objects = list_create( + destroy_acct_qos_rec); + break; case ACCT_UPDATE_NOTSET: default: error("unknown type set in update_object: %d", type); @@ -215,9 +222,9 @@ static int _move_account(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, "where cluster='%s' && acct='%s' && user='';", assoc_table, cluster, parent); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -269,8 +276,8 @@ static int _move_account(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, xstrfmtcat(query, "update %s set parent_acct='%s' where id = %s;", assoc_table, parent, id); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); return rc; @@ -299,9 +306,9 @@ static int _move_parent(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, "&& acct='%s' && user='' order by lft;", assoc_table, lft, rgt, new_parent); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = - mysql_db_query_ret(mysql_conn->acct_mysql_db, query, 0))) { + mysql_db_query_ret(mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -311,7 +318,7 @@ static int _move_parent(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, debug4("%s(%s) %s,%s is a child of %s", new_parent, row[0], row[1], row[2], id); rc = _move_account(mysql_conn, atoi(row[1]), atoi(row[2]), - cluster, row[0], old_parent); + cluster, row[0], old_parent); } mysql_free_result(result); @@ -392,13 +399,13 @@ static int _modify_common(mysql_conn_t *mysql_conn, "values (%d, %d, \"%s\", '%s', \"%s\");", txn_table, now, type, cond_char, user_name, vals); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); @@ -468,9 +475,9 @@ static int _modify_unset_users(mysql_conn_t *mysql_conn, /* "order by lft;", */ /* object, assoc_table, lft, rgt); */ xfree(object); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = - mysql_db_query_ret(mysql_conn->acct_mysql_db, query, 0))) { + mysql_db_query_ret(mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -557,7 +564,71 @@ static int _modify_unset_users(mysql_conn_t *mysql_conn, return SLURM_SUCCESS; } +/* this function is here to see if any of what we are trying to remove + * has jobs that are or were once running. So if we have jobs and the + * object is less than a day old we don't want to delete it only set + * the deleted flag. + */ +static bool _check_jobs_before_remove(mysql_conn_t *mysql_conn, + char *assoc_char) +{ + char *query = NULL; + bool rc = 0; + MYSQL_RES *result = NULL; + + query = xstrdup_printf("select t0.associd from %s as t0, %s as t1, " + "%s as t2 where t1.lft between " + "t2.lft and t2.rgt && (%s)" + "and t0.associd=t1.id limit 1;", + job_table, assoc_table, assoc_table, + assoc_char); + + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + if(!(result = mysql_db_query_ret( + mysql_conn->db_conn, query, 0))) { + xfree(query); + return rc; + } + xfree(query); + + if(mysql_num_rows(result)) { + debug4("We have jobs for this combo"); + rc = true; + } + + mysql_free_result(result); + return rc; +} + +static bool _check_jobs_before_remove_assoc(mysql_conn_t *mysql_conn, + char *assoc_char) +{ + char *query = NULL; + bool rc = 0; + MYSQL_RES *result = NULL; + + query = xstrdup_printf("select t1.associd from %s as t1, " + "%s as t2 where (%s)" + "and t1.associd=t2.id limit 1;", + job_table, assoc_table, + assoc_char); + + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + if(!(result = mysql_db_query_ret( + mysql_conn->db_conn, query, 0))) { + xfree(query); + return rc; + } + xfree(query); + + if(mysql_num_rows(result)) { + debug4("We have jobs for this combo"); + rc = true; + } + mysql_free_result(result); + return rc; +} /* Every option in assoc_char should have a 't1.' infront of it. */ static int _remove_common(mysql_conn_t *mysql_conn, @@ -574,37 +645,108 @@ static int _remove_common(mysql_conn_t *mysql_conn, MYSQL_RES *result = NULL; MYSQL_ROW row; time_t day_old = now - DELETE_SEC_BACK; + bool has_jobs = false; + /* If we have jobs associated with this we do not want to + * really delete it for accounting purposes. This is for + * corner cases most of the time this won't matter. + */ + if(table == acct_coord_table + || table == qos_table) { + /* This doesn't apply for these tables since we are + * only looking for association type tables. + */ + } else if(table != assoc_table) { + has_jobs = _check_jobs_before_remove(mysql_conn, assoc_char); + } else { + has_jobs = _check_jobs_before_remove_assoc(mysql_conn, + name_char); + } /* we want to remove completely all that is less than a day old */ - if(table != assoc_table) { + if(!has_jobs && table != assoc_table) { query = xstrdup_printf("delete from %s where creation_time>%d " "&& (%s);", table, day_old, name_char); } - xstrfmtcat(query, - "update %s set mod_time=%d, deleted=1 " - "where deleted=0 && (%s);", - table, now, name_char); + if(table != assoc_table) + xstrfmtcat(query, + "update %s set mod_time=%d, deleted=1 " + "where deleted=0 && (%s);", + table, now, name_char); + xstrfmtcat(query, "insert into %s (timestamp, action, name, actor) " "values (%d, %d, \"%s\", '%s');", txn_table, now, type, name_char, user_name); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); return SLURM_ERROR; } - if(table == acct_coord_table) + if(table == qos_table) { + /* remove this qos from all the users/accts that have it + */ + xstrfmtcat(query, + "update %s set mod_time=%d, %s " + "where deleted=0;", + user_table, now, assoc_char); + xstrfmtcat(query, + "update %s set mod_time=%d, %s " + "where deleted=0;", + acct_table, now, assoc_char); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); + xfree(query); + if(rc != SLURM_SUCCESS) { + if(mysql_conn->rollback) { + mysql_db_rollback(mysql_conn->db_conn); + } + list_flush(mysql_conn->update_list); + + return SLURM_ERROR; + } + /* now get what we changed and set the update */ + xstrfmtcat(query, + "select name, qos from %s where " + "mod_time=%d and deleted=0;", + user_table, now); + if(!(result = mysql_db_query_ret( + mysql_conn->db_conn, query, 0))) { + xfree(query); + if(mysql_conn->rollback) { + mysql_db_rollback(mysql_conn->db_conn); + } + list_flush(mysql_conn->update_list); + + return SLURM_ERROR; + } + + rc = 0; + while((row = mysql_fetch_row(result))) { + acct_user_rec_t *user_rec = + xmalloc(sizeof(acct_user_rec_t)); + user_rec->name = xstrdup(row[0]); + user_rec->qos_list = list_create(slurm_destroy_char); + slurm_addto_char_list(user_rec->qos_list, row[1]); + _addto_update_list(mysql_conn->update_list, + ACCT_MODIFY_USER, + user_rec); + } + mysql_free_result(result); + + return SLURM_SUCCESS; + } else if(table == acct_coord_table) return SLURM_SUCCESS; /* mark deleted=1 or remove completely the @@ -614,7 +756,7 @@ static int _remove_common(mysql_conn_t *mysql_conn, if(!assoc_char) { error("no assoc_char"); if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); return SLURM_ERROR; @@ -627,16 +769,18 @@ static int _remove_common(mysql_conn_t *mysql_conn, /* assoc_table, assoc_char); */ query = xstrdup_printf("select distinct t1.id " "from %s as t1, %s as t2 " - "where %s && t1.lft between " - "t2.lft and t2.rgt;", + "where (%s) && t1.lft between " + "t2.lft and t2.rgt && t1.deleted=0 " + " && t2.deleted=0;", assoc_table, assoc_table, assoc_char); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); return SLURM_ERROR; @@ -665,20 +809,20 @@ static int _remove_common(mysql_conn_t *mysql_conn, } else loc_assoc_char = assoc_char; -/* query = xstrdup_printf( */ -/* "delete t2 from %s as t2, %s as t1 where t1.creation_time>%d && (%s);" */ -/* "delete t2 from %s as t2, %s as t1 where t1.creation_time>%d && (%s);" */ -/* "delete t2 from %s as t2, %s as t1 where t1.creation_time>%d && (%s);", */ -/* assoc_day_table, assoc_table, day_old, loc_assoc_char, */ -/* assoc_hour_table, assoc_table, day_old, loc_assoc_char, */ -/* assoc_month_table, assoc_table, day_old, loc_assoc_char); */ - query = xstrdup_printf( - "delete from %s where creation_time>%d && (%s);" - "delete from %s where creation_time>%d && (%s);" - "delete from %s where creation_time>%d && (%s);", - assoc_day_table, day_old, loc_assoc_char, - assoc_hour_table, day_old, loc_assoc_char, - assoc_month_table, day_old, loc_assoc_char); + if(!loc_assoc_char) { + debug2("No associations with object being deleted\n"); + return rc; + } + + if(!has_jobs) + query = xstrdup_printf( + "delete from %s where creation_time>%d && (%s);" + "delete from %s where creation_time>%d && (%s);" + "delete from %s where creation_time>%d && (%s);", + assoc_day_table, day_old, loc_assoc_char, + assoc_hour_table, day_old, loc_assoc_char, + assoc_month_table, day_old, loc_assoc_char); + xstrfmtcat(query, "update %s set mod_time=%d, deleted=1 where (%s);" "update %s set mod_time=%d, deleted=1 where (%s);" @@ -687,17 +831,25 @@ static int _remove_common(mysql_conn_t *mysql_conn, assoc_hour_table, now, loc_assoc_char, assoc_month_table, now, loc_assoc_char); - debug3("%d query\n%s %d", mysql_conn->conn, query, strlen(query)); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s %d", + mysql_conn->conn, __LINE__, query, strlen(query)); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); return SLURM_ERROR; } + /* If we have jobs that have ran don't go through the logic of + * removing the associations. Since we may want them for + * reports in the future since jobs had ran. + */ + if(has_jobs) + goto just_update; + /* remove completely all the associations for this added in the last * day, since they are most likely nothing we really wanted in * the first place. @@ -706,31 +858,34 @@ static int _remove_common(mysql_conn_t *mysql_conn, "creation_time>%d && (%s);", assoc_table, day_old, loc_assoc_char); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); return SLURM_ERROR; } xfree(query); - /* we have to do this one at a time since the lft's and rgt's - change */ while((row = mysql_fetch_row(result))) { MYSQL_RES *result2 = NULL; MYSQL_ROW row2; + /* we have to do this one at a time since the lft's and rgt's + change. If you think you need to remove this make + sure your new way can handle changing lft and rgt's + in the association. */ xstrfmtcat(query, "SELECT lft, rgt, (rgt - lft + 1) " "FROM %s WHERE id = %s;", assoc_table, row[0]); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); if(!(result2 = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); rc = SLURM_ERROR; break; @@ -746,6 +901,7 @@ static int _remove_common(mysql_conn_t *mysql_conn, "%s AND %s;", assoc_table, row2[0], row2[1]); + xstrfmtcat(query, "UPDATE %s SET rgt = rgt - %s WHERE " "rgt > %s;" @@ -753,12 +909,14 @@ static int _remove_common(mysql_conn_t *mysql_conn, "lft > %s;", assoc_table, row2[2], row2[1], - assoc_table, row2[2], row2[1]); - + assoc_table, row2[2], + row2[1]); + mysql_free_result(result2); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("couldn't remove assoc"); @@ -768,27 +926,36 @@ static int _remove_common(mysql_conn_t *mysql_conn, mysql_free_result(result); if(rc == SLURM_ERROR) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); return rc; } - - if(table == assoc_table) - return SLURM_SUCCESS; - - /* now update the associations themselves that are still around */ - query = xstrdup_printf("update %s as t1 set mod_time=%d, deleted=1 " - "where deleted=0 && (%s);", + +just_update: + /* now update the associations themselves that are still + * around clearing all the limits since if we add them back + * we don't want any residue from past associations lingering + * around. + */ + query = xstrdup_printf("update %s as t1 set mod_time=%d, deleted=1, " + "fairshare=1, max_jobs=NULL, " + "max_nodes_per_job=NULL, " + "max_wall_duration_per_job=NULL, " + "max_cpu_secs_per_job=NULL " + "where (%s);", assoc_table, now, loc_assoc_char); - xfree(loc_assoc_char); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + + if(table != assoc_table) + xfree(loc_assoc_char); + + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); } @@ -817,7 +984,7 @@ static int _get_account_coords(mysql_conn_t *mysql_conn, acct_coord_table, acct->name); if(!(result = - mysql_db_query_ret(mysql_conn->acct_mysql_db, query, 0))) { + mysql_db_query_ret(mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -838,7 +1005,7 @@ static int _get_account_coords(mysql_conn_t *mysql_conn, acct_coord_table, assoc_table, assoc_table, acct->name, acct->name); if(!(result = - mysql_db_query_ret(mysql_conn->acct_mysql_db, query, 0))) { + mysql_db_query_ret(mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -873,7 +1040,7 @@ static int _get_user_coords(mysql_conn_t *mysql_conn, acct_user_rec_t *user) acct_coord_table, user->name); if(!(result = - mysql_db_query_ret(mysql_conn->acct_mysql_db, query, 0))) { + mysql_db_query_ret(mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -904,7 +1071,7 @@ static int _get_user_coords(mysql_conn_t *mysql_conn, acct_user_rec_t *user) if(query) { if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -932,7 +1099,7 @@ static int _get_user_coords(mysql_conn_t *mysql_conn, acct_user_rec_t *user) return SLURM_SUCCESS; } -static int _get_db_index(MYSQL *acct_mysql_db, +static int _get_db_index(MYSQL *db_conn, time_t submit, uint32_t jobid, uint32_t associd) { MYSQL_RES *result = NULL; @@ -942,7 +1109,7 @@ static int _get_db_index(MYSQL *acct_mysql_db, "submit=%d and jobid=%u and associd=%u", job_table, (int)submit, jobid, associd); - if(!(result = mysql_db_query_ret(acct_mysql_db, query, 0))) { + if(!(result = mysql_db_query_ret(db_conn, query, 0))) { xfree(query); return -1; } @@ -974,7 +1141,7 @@ static mysql_db_info_t *_mysql_acct_create_db_info() return db_info; } -static int _mysql_acct_check_tables(MYSQL *acct_mysql_db) +static int _mysql_acct_check_tables(MYSQL *db_conn) { int rc = SLURM_SUCCESS; storage_field_t acct_coord_table_fields[] = { @@ -993,7 +1160,7 @@ static int _mysql_acct_check_tables(MYSQL *acct_mysql_db) { "name", "tinytext not null" }, { "description", "text not null" }, { "organization", "text not null" }, - { "qos", "smallint default 1 not null" }, + { "qos", "blob" }, { NULL, NULL} }; @@ -1096,6 +1263,16 @@ static int _mysql_acct_check_tables(MYSQL *acct_mysql_db) { NULL, NULL} }; + storage_field_t qos_table_fields[] = { + { "creation_time", "int unsigned not null" }, + { "mod_time", "int unsigned default 0 not null" }, + { "deleted", "tinyint default 0" }, + { "id", "int not null auto_increment" }, + { "name", "tinytext not null" }, + { "description", "text" }, + { NULL, NULL} + }; + storage_field_t step_table_fields[] = { { "id", "int not null" }, { "stepid", "smallint not null" }, @@ -1155,7 +1332,7 @@ static int _mysql_acct_check_tables(MYSQL *acct_mysql_db) { "deleted", "tinyint default 0" }, { "name", "tinytext not null" }, { "default_acct", "tinytext not null" }, - { "qos", "smallint default 1 not null" }, + { "qos", "blob" }, { "admin_level", "smallint default 1 not null" }, { NULL, NULL} }; @@ -1204,102 +1381,120 @@ static int _mysql_acct_check_tables(MYSQL *acct_mysql_db) "&& @mcpj != -1) || @my_acct = '' END REPEAT; " "END;"; - if(mysql_db_create_table(acct_mysql_db, acct_coord_table, + if(mysql_db_create_table(db_conn, acct_coord_table, acct_coord_table_fields, ", primary key (acct(20), user(20)))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, acct_table, acct_table_fields, + if(mysql_db_create_table(db_conn, acct_table, acct_table_fields, ", primary key (name(20)))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, assoc_day_table, + if(mysql_db_create_table(db_conn, assoc_day_table, assoc_usage_table_fields, ", primary key (id, period_start))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, assoc_hour_table, + if(mysql_db_create_table(db_conn, assoc_hour_table, assoc_usage_table_fields, ", primary key (id, period_start))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, assoc_month_table, + if(mysql_db_create_table(db_conn, assoc_month_table, assoc_usage_table_fields, ", primary key (id, period_start))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, assoc_table, assoc_table_fields, + if(mysql_db_create_table(db_conn, assoc_table, assoc_table_fields, ", primary key (id), " " unique index (user(20), acct(20), " "cluster(20), partition(20)))" /* " unique index (lft), " */ -/* " unique index (rgt))" */) + /* " unique index (rgt))" */) == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, cluster_day_table, + if(mysql_db_create_table(db_conn, cluster_day_table, cluster_usage_table_fields, ", primary key (cluster(20), period_start))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, cluster_hour_table, + if(mysql_db_create_table(db_conn, cluster_hour_table, cluster_usage_table_fields, ", primary key (cluster(20), period_start))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, cluster_month_table, + if(mysql_db_create_table(db_conn, cluster_month_table, cluster_usage_table_fields, ", primary key (cluster(20), period_start))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, cluster_table, + if(mysql_db_create_table(db_conn, cluster_table, cluster_table_fields, ", primary key (name(20)))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, event_table, + if(mysql_db_create_table(db_conn, event_table, event_table_fields, ", primary key (node_name(20), cluster(20), " "period_start))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, job_table, job_table_fields, + if(mysql_db_create_table(db_conn, job_table, job_table_fields, ", primary key (id), " "unique index (jobid, associd, submit))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, last_ran_table, + if(mysql_db_create_table(db_conn, last_ran_table, last_ran_table_fields, ")") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, step_table, + if(mysql_db_create_table(db_conn, qos_table, + qos_table_fields, + ", primary key (id), " + "unique index (name(20)))") + == SLURM_ERROR) + return SLURM_ERROR; + else { + time_t now = time(NULL); + char *query = xstrdup_printf( + "insert into %s " + "(creation_time, mod_time, name, description) " + "values (%d, %d, 'normal', 'Normal QOS default') " + "on duplicate key update deleted=0;", + qos_table, now, now); + debug3("%s", query); + mysql_db_query(db_conn, query); + xfree(query); + } + if(mysql_db_create_table(db_conn, step_table, step_table_fields, ", primary key (id, stepid))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, suspend_table, + if(mysql_db_create_table(db_conn, suspend_table, suspend_table_fields, ")") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, txn_table, txn_table_fields, + if(mysql_db_create_table(db_conn, txn_table, txn_table_fields, ", primary key (id))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, user_table, user_table_fields, + if(mysql_db_create_table(db_conn, user_table, user_table_fields, ", primary key (name(20)))") == SLURM_ERROR) return SLURM_ERROR; - rc = mysql_db_query(acct_mysql_db, get_parent_proc); + rc = mysql_db_query(db_conn, get_parent_proc); return rc; } @@ -1314,7 +1509,7 @@ extern int init ( void ) static int first = 1; int rc = SLURM_SUCCESS; #ifdef HAVE_MYSQL - MYSQL *acct_mysql_db = NULL; + MYSQL *db_conn = NULL; char *location = NULL; #else fatal("No MySQL database was found on the machine. " @@ -1355,11 +1550,11 @@ extern int init ( void ) debug2("mysql_connect() called for db %s", mysql_db_name); - mysql_get_db_connection(&acct_mysql_db, mysql_db_name, mysql_db_info); + mysql_get_db_connection(&db_conn, mysql_db_name, mysql_db_info); - rc = _mysql_acct_check_tables(acct_mysql_db); + rc = _mysql_acct_check_tables(db_conn); - mysql_close_db_connection(&acct_mysql_db); + mysql_close_db_connection(&db_conn); #endif @@ -1393,11 +1588,11 @@ extern void *acct_storage_p_get_connection(bool make_agent, bool rollback) debug2("acct_storage_p_get_connection: request new connection"); - mysql_get_db_connection(&mysql_conn->acct_mysql_db, + mysql_get_db_connection(&mysql_conn->db_conn, mysql_db_name, mysql_db_info); mysql_conn->rollback = rollback; if(rollback) { - mysql_autocommit(mysql_conn->acct_mysql_db, 0); + mysql_autocommit(mysql_conn->db_conn, 0); } mysql_conn->conn = conn++; mysql_conn->update_list = list_create(destroy_acct_update_object); @@ -1415,7 +1610,7 @@ extern int acct_storage_p_close_connection(mysql_conn_t **mysql_conn) return SLURM_SUCCESS; acct_storage_p_commit((*mysql_conn), 0); - mysql_close_db_connection(&(*mysql_conn)->acct_mysql_db); + mysql_close_db_connection(&(*mysql_conn)->db_conn); list_destroy((*mysql_conn)->update_list); xfree((*mysql_conn)); @@ -1435,10 +1630,10 @@ extern int acct_storage_p_commit(mysql_conn_t *mysql_conn, bool commit) if(mysql_conn->rollback) { if(!commit) { - if(mysql_db_rollback(mysql_conn->acct_mysql_db)) + if(mysql_db_rollback(mysql_conn->db_conn)) error("rollback failed"); } else { - if(mysql_db_commit(mysql_conn->acct_mysql_db)) + if(mysql_db_commit(mysql_conn->db_conn)) error("commit failed"); } } @@ -1464,7 +1659,7 @@ extern int acct_storage_p_commit(mysql_conn_t *mysql_conn, bool commit) "where deleted=0 && control_port != 0", cluster_table); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); goto skip; } @@ -1526,6 +1721,10 @@ extern int acct_storage_p_commit(mysql_conn_t *mysql_conn, bool commit) case ACCT_REMOVE_ASSOC: rc = assoc_mgr_update_local_assocs(object); break; + case ACCT_ADD_QOS: + case ACCT_REMOVE_QOS: + rc = assoc_mgr_update_local_qos(object); + break; case ACCT_UPDATE_NOTSET: default: error("unknown type set in " @@ -1579,11 +1778,30 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid, xstrfmtcat(vals, "%d, %d, '%s', '%s'", now, now, object->name, object->default_acct); xstrfmtcat(extra, ", default_acct='%s'", object->default_acct); - if(object->qos != ACCT_QOS_NOTSET) { + if(object->qos_list && list_count(object->qos_list)) { + char *qos_val = NULL; + char *tmp_char = NULL; + ListIterator qos_itr = + list_iterator_create(object->qos_list); xstrcat(cols, ", qos"); - xstrfmtcat(vals, ", %u", object->qos); - xstrfmtcat(extra, ", qos=%u", object->qos); + while((tmp_char = list_next(qos_itr))) { + xstrfmtcat(qos_val, ",%s", tmp_char); + } + + xstrfmtcat(vals, ", '%s'", qos_val); + xstrfmtcat(extra, ", qos='%s'", qos_val); } + /* Since I don't really want to go find out which id + * normal is we are not going to add it at all which + * isn't a big deal since if the list is blank the user + * will get it be default + */ + /* else { */ +/* /\* Add normal qos to the user *\/ */ +/* xstrcat(cols, ", qos"); */ +/* xstrfmtcat(vals, ", ',0'"); */ +/* xstrfmtcat(extra, ", qos=',0'"); */ +/* } */ if(object->admin_level != ACCT_ADMIN_NOTSET) { xstrcat(cols, ", admin_level"); @@ -1598,7 +1816,7 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid, xfree(cols); xfree(vals); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't add user %s", object->name); @@ -1606,7 +1824,7 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid, continue; } - affect_rows = _last_affected_rows(mysql_conn->acct_mysql_db); + affect_rows = _last_affected_rows(mysql_conn->db_conn); if(!affect_rows) { debug("nothing changed"); xfree(extra); @@ -1643,7 +1861,7 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid, if(rc != SLURM_ERROR) { if(txn_query) { xstrcat(txn_query, ";"); - rc = mysql_db_query(mysql_conn->acct_mysql_db, + rc = mysql_db_query(mysql_conn->db_conn, txn_query); xfree(txn_query); if(rc != SLURM_SUCCESS) { @@ -1670,7 +1888,7 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid, } extern int acct_storage_p_add_coord(mysql_conn_t *mysql_conn, uint32_t uid, - List acct_list, acct_user_cond_t *user_q) + List acct_list, acct_user_cond_t *user_cond) { #ifdef HAVE_MYSQL char *query = NULL, *user = NULL, *acct = NULL; @@ -1681,7 +1899,9 @@ extern int acct_storage_p_add_coord(mysql_conn_t *mysql_conn, uint32_t uid, int rc = SLURM_SUCCESS; acct_user_rec_t *user_rec = NULL; - if(!user_q || !user_q->user_list || !list_count(user_q->user_list) + if(!user_cond || !user_cond->assoc_cond + || !user_cond->assoc_cond->user_list + || !list_count(user_cond->assoc_cond->user_list) || !acct_list || !list_count(acct_list)) { error("we need something to add"); return SLURM_ERROR; @@ -1694,7 +1914,7 @@ extern int acct_storage_p_add_coord(mysql_conn_t *mysql_conn, uint32_t uid, user_name = pw->pw_name; } - itr = list_iterator_create(user_q->user_list); + itr = list_iterator_create(user_cond->assoc_cond->user_list); itr2 = list_iterator_create(acct_list); while((user = list_next(itr))) { while((acct = list_next(itr2))) { @@ -1734,8 +1954,9 @@ extern int acct_storage_p_add_coord(mysql_conn_t *mysql_conn, uint32_t uid, xstrfmtcat(query, " on duplicate key update mod_time=%d, deleted=0;%s", now, txn_query); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); xfree(txn_query); @@ -1744,7 +1965,7 @@ extern int acct_storage_p_add_coord(mysql_conn_t *mysql_conn, uint32_t uid, return rc; } /* get the update list set */ - itr = list_iterator_create(user_q->user_list); + itr = list_iterator_create(user_cond->assoc_cond->user_list); while((user = list_next(itr))) { user_rec = xmalloc(sizeof(acct_user_rec_t)); user_rec->name = xstrdup(user); @@ -1802,10 +2023,18 @@ extern int acct_storage_p_add_accts(mysql_conn_t *mysql_conn, uint32_t uid, xstrfmtcat(extra, ", description='%s', organization='%s'", object->description, object->organization); - if(object->qos != ACCT_QOS_NOTSET) { + if(object->qos_list && list_count(object->qos_list)) { + char *qos_val = NULL; + char *tmp_char = NULL; + ListIterator qos_itr = + list_iterator_create(object->qos_list); xstrcat(cols, ", qos"); - xstrfmtcat(vals, ", %u", object->qos); - xstrfmtcat(extra, ", qos=%u", object->qos); + while((tmp_char = list_next(qos_itr))) { + xstrfmtcat(qos_val, ",%s", tmp_char); + } + + xstrfmtcat(vals, ", '%s'", qos_val); + xstrfmtcat(extra, ", qos='%s'", qos_val); } query = xstrdup_printf( @@ -1813,8 +2042,9 @@ extern int acct_storage_p_add_accts(mysql_conn_t *mysql_conn, uint32_t uid, "on duplicate key update deleted=0, mod_time=%d %s;", acct_table, cols, vals, now, extra); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(cols); xfree(vals); xfree(query); @@ -1823,7 +2053,7 @@ extern int acct_storage_p_add_accts(mysql_conn_t *mysql_conn, uint32_t uid, xfree(extra); continue; } - affect_rows = _last_affected_rows(mysql_conn->acct_mysql_db); + affect_rows = _last_affected_rows(mysql_conn->db_conn); /* debug3("affected %d", affect_rows); */ if(!affect_rows) { @@ -1857,7 +2087,7 @@ extern int acct_storage_p_add_accts(mysql_conn_t *mysql_conn, uint32_t uid, if(rc != SLURM_ERROR) { if(txn_query) { xstrcat(txn_query, ";"); - rc = mysql_db_query(mysql_conn->acct_mysql_db, + rc = mysql_db_query(mysql_conn->db_conn, txn_query); xfree(txn_query); if(rc != SLURM_SUCCESS) { @@ -1934,7 +2164,8 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid, object->default_max_cpu_secs_per_job); xstrfmtcat(extra, ", max_cpu_secs_per_job=%u", object->default_max_cpu_secs_per_job); - } else if((int)object->default_max_cpu_secs_per_job == INFINITE) { + } else if((int)object->default_max_cpu_secs_per_job + == INFINITE) { xstrcat(cols, ", max_cpu_secs_per_job"); xstrfmtcat(vals, ", NULL"); xstrfmtcat(extra, ", max_cpu_secs_per_job=NULL"); @@ -1983,8 +2214,9 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid, cluster_table, now, now, object->name, now); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't add cluster %s", object->name); @@ -1995,7 +2227,7 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid, break; } - affect_rows = _last_affected_rows(mysql_conn->acct_mysql_db); + affect_rows = _last_affected_rows(mysql_conn->db_conn); if(!affect_rows) { debug2("nothing changed %d", affect_rows); @@ -2020,9 +2252,10 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid, xfree(cols); xfree(vals); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { @@ -2039,7 +2272,7 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid, now, DBD_ADD_CLUSTERS, object->name, user, extra); xfree(extra); debug4("query\n%s",query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't add txn"); @@ -2050,7 +2283,7 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid, if(!added) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); } @@ -2203,13 +2436,14 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, } xstrfmtcat(query, - "select distinct %s from %s %s order by lft " + "select distinct %s from %s %s order by lft " "FOR UPDATE;", tmp_char, assoc_table, update); xfree(tmp_char); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); xfree(cols); xfree(vals); @@ -2223,6 +2457,16 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, assoc_id = 0; if(!(row = mysql_fetch_row(result))) { + /* This code speeds up the add process quite a bit + * here we are only doing an update when we are done + * adding to a specific group (cluster/account) other + * than that we are adding right behind what we were + * so just total them up and then do one update + * instead of the slow ones that require an update + * every time. There is a incr check outside of the + * loop to catch everything on the last spin of the + * while. + */ if(!old_parent || !old_cluster || strcasecmp(parent, old_parent) || strcasecmp(object->cluster, old_cluster)) { @@ -2251,7 +2495,7 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, debug3("%d query\n%s", mysql_conn->conn, up_query); rc = mysql_db_query( - mysql_conn->acct_mysql_db, + mysql_conn->db_conn, up_query); xfree(up_query); if(rc != SLURM_SUCCESS) { @@ -2268,7 +2512,7 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, debug3("%d query\n%s", mysql_conn->conn, sel_query); if(!(sel_result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, + mysql_conn->db_conn, sel_query, 0))) { xfree(cols); xfree(vals); @@ -2330,6 +2574,7 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, /* assoc_table, cols, */ /* vals); */ } else if(!atoi(row[MASSOC_DELETED])) { + /* We don't need to do anything here */ debug("This account was added already"); xfree(cols); xfree(vals); @@ -2338,6 +2583,11 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, xfree(extra); continue; } else { + /* If it was once deleted we have kept the lft + * and rgt's consant while it was deleted and + * so we can just unset the deleted flag, + * check for the parent and move if needed. + */ assoc_id = atoi(row[MASSOC_ID]); if(object->parent_acct && strcasecmp(object->parent_acct, @@ -2368,8 +2618,9 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, xfree(cols); xfree(vals); xfree(update); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't add assoc"); @@ -2381,8 +2632,8 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, */ if(!assoc_id) { affect_rows = _last_affected_rows( - mysql_conn->acct_mysql_db); - assoc_id = mysql_insert_id(mysql_conn->acct_mysql_db); + mysql_conn->db_conn); + assoc_id = mysql_insert_id(mysql_conn->db_conn); //info("last id was %d", assoc_id); } @@ -2424,10 +2675,11 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, assoc_table, incr, my_left, assoc_table); - debug3("%d query\n%s", mysql_conn->conn, up_query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, up_query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, up_query); + rc = mysql_db_query(mysql_conn->db_conn, up_query); xfree(up_query); - if(rc != SLURM_SUCCESS) + if(rc != SLURM_SUCCESS) error("Couldn't do update 2"); } @@ -2436,7 +2688,7 @@ end_it: if(rc != SLURM_ERROR) { if(txn_query) { xstrcat(txn_query, ";"); - rc = mysql_db_query(mysql_conn->acct_mysql_db, + rc = mysql_db_query(mysql_conn->db_conn, txn_query); xfree(txn_query); if(rc != SLURM_SUCCESS) { @@ -2447,7 +2699,7 @@ end_it: } else { xfree(txn_query); if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); } @@ -2461,8 +2713,98 @@ end_it: #endif } +extern int acct_storage_p_add_qos(mysql_conn_t *mysql_conn, uint32_t uid, + List qos_list) +{ +#ifdef HAVE_MYSQL + ListIterator itr = NULL; + int rc = SLURM_SUCCESS; + acct_qos_rec_t *object = NULL; + char *query = NULL; + time_t now = time(NULL); + struct passwd *pw = NULL; + char *user = NULL; + int affect_rows = 0; + int added = 0; + + if(_check_connection(mysql_conn) != SLURM_SUCCESS) + return SLURM_ERROR; + + if((pw=getpwuid(uid))) { + user = pw->pw_name; + } + + itr = list_iterator_create(qos_list); + while((object = list_next(itr))) { + if(!object->name) { + error("We need a qos name to add."); + rc = SLURM_ERROR; + continue; + } + + xstrfmtcat(query, + "insert into %s (creation_time, mod_time, " + "name, description) " + "values (%d, %d, '%s', '%s') " + "on duplicate key update deleted=0, mod_time=%d;", + qos_table, + now, now, object->name, object->description, + now); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); + xfree(query); + if(rc != SLURM_SUCCESS) { + error("Couldn't add qos %s", object->name); + added=0; + break; + } + + affect_rows = _last_affected_rows(mysql_conn->db_conn); + + if(!affect_rows) { + debug2("nothing changed %d", affect_rows); + continue; + } + xstrfmtcat(query, + "insert into %s " + "(timestamp, action, name, actor, info) " + "values (%d, %u, '%s', '%s', \"%s\");", + txn_table, + now, DBD_ADD_QOS, object->name, user, + object->description); + + debug4("query\n%s",query); + rc = mysql_db_query(mysql_conn->db_conn, query); + xfree(query); + if(rc != SLURM_SUCCESS) { + error("Couldn't add txn"); + } else { + if(_addto_update_list(mysql_conn->update_list, + ACCT_ADD_QOS, + object) == SLURM_SUCCESS) + list_remove(itr); + added++; + } + + } + list_iterator_destroy(itr); + + if(!added) { + if(mysql_conn->rollback) { + mysql_db_rollback(mysql_conn->db_conn); + } + list_flush(mysql_conn->update_list); + } + + return rc; +#else + return SLURM_ERROR; +#endif +} + extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, - acct_user_cond_t *user_q, + acct_user_cond_t *user_cond, acct_user_rec_t *user) { #ifdef HAVE_MYSQL @@ -2477,8 +2819,9 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, int set = 0; MYSQL_RES *result = NULL; MYSQL_ROW row; + int replace_qos = 0; - if(!user_q) { + if(!user_cond) { error("we need something to change"); return NULL; } @@ -2491,10 +2834,11 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, } xstrcat(extra, "where deleted=0"); - if(user_q->user_list && list_count(user_q->user_list)) { + if(user_cond->assoc_cond && user_cond->assoc_cond->user_list + && list_count(user_cond->assoc_cond->user_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(user_q->user_list); + itr = list_iterator_create(user_cond->assoc_cond->user_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2505,10 +2849,10 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, xstrcat(extra, ")"); } - if(user_q->def_acct_list && list_count(user_q->def_acct_list)) { + if(user_cond->def_acct_list && list_count(user_cond->def_acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(user_q->def_acct_list); + itr = list_iterator_create(user_cond->def_acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2519,19 +2863,58 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, xstrcat(extra, ")"); } - if(user_q->qos != ACCT_QOS_NOTSET) { - xstrfmtcat(extra, " && qos=%u", user_q->qos); + if(user_cond->qos_list && list_count(user_cond->qos_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(user_cond->qos_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, + "(qos like '%%,%s' || qos like '%%,%s,%%')", + object, object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); } - if(user_q->admin_level != ACCT_ADMIN_NOTSET) { - xstrfmtcat(extra, " && admin_level=%u", user_q->admin_level); + if(user_cond->admin_level != ACCT_ADMIN_NOTSET) { + xstrfmtcat(extra, " && admin_level=%u", user_cond->admin_level); } if(user->default_acct) xstrfmtcat(vals, ", default_acct='%s'", user->default_acct); - if(user->qos != ACCT_QOS_NOTSET) - xstrfmtcat(vals, ", qos=%u", user->qos); + if(user->qos_list && list_count(user->qos_list)) { + char *tmp_qos = NULL; + set = 0; + itr = list_iterator_create(user->qos_list); + while((object = list_next(itr))) { + /* when adding we need to make sure we don't + * already have it so we remove it and then add + * it. + */ + if(object[0] == '-') { + xstrfmtcat(vals, + ", qos=replace(qos, ',%s', '')", + object+1); + } else if(object[0] == '+') { + xstrfmtcat(vals, + ", qos=concat(" + "replace(qos, ',%s', ''), ',%s')", + object+1, object+1); + } else { + xstrfmtcat(tmp_qos, ",%s", object); + } + } + list_iterator_destroy(itr); + if(tmp_qos) { + xstrfmtcat(vals, ", qos='%s'", tmp_qos); + xfree(tmp_qos); + replace_qos = 1; + } + } if(user->admin_level != ACCT_ADMIN_NOTSET) xstrfmtcat(vals, ", admin_level=%u", user->admin_level); @@ -2541,10 +2924,11 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, error("Nothing to change"); return NULL; } - query = xstrdup_printf("select name from %s %s;", user_table, extra); + query = xstrdup_printf("select name, qos from %s %s;", + user_table, extra); xfree(extra); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -2552,6 +2936,8 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, rc = 0; ret_list = list_create(slurm_destroy_char); while((row = mysql_fetch_row(result))) { + acct_user_rec_t *user_rec = NULL; + object = xstrdup(row[0]); list_append(ret_list, object); if(!rc) { @@ -2560,6 +2946,62 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, } else { xstrfmtcat(name_char, " || name='%s'", object); } + user_rec = xmalloc(sizeof(acct_user_rec_t)); + user_rec->name = xstrdup(object); + user_rec->default_acct = xstrdup(user->default_acct); + user_rec->admin_level = user->admin_level; + if(user->qos_list) { + ListIterator new_qos_itr = + list_iterator_create(user->qos_list); + ListIterator curr_qos_itr = NULL; + char *new_qos = NULL, *curr_qos = NULL; + + user_rec->qos_list = list_create(slurm_destroy_char); + if(!replace_qos) + slurm_addto_char_list(user_rec->qos_list, + row[1]); + curr_qos_itr = list_iterator_create(user_rec->qos_list); + + while((new_qos = list_next(new_qos_itr))) { + char *tmp_char = NULL; + if(new_qos[0] == '-') { + tmp_char = xstrdup(object+1); + while((curr_qos = + list_next(curr_qos_itr))) { + if(!strcmp(curr_qos, + tmp_char)) { + list_delete_item( + curr_qos_itr); + break; + } + } + xfree(tmp_char); + list_iterator_reset(curr_qos_itr); + } else if(new_qos[0] == '+') { + tmp_char = xstrdup(object+1); + while((curr_qos = + list_next(curr_qos_itr))) { + if(!strcmp(curr_qos, + tmp_char)) { + break; + } + } + if(!curr_qos) + list_append(user_rec->qos_list, + tmp_char); + else + xfree(tmp_char); + list_iterator_reset(curr_qos_itr); + } else { + list_append(user_rec->qos_list, + xstrdup(object)); + } + } + list_iterator_destroy(curr_qos_itr); + list_iterator_destroy(new_qos_itr); + } + _addto_update_list(mysql_conn->update_list, ACCT_MODIFY_USER, + user_rec); } mysql_free_result(result); @@ -2592,7 +3034,7 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, extern List acct_storage_p_modify_accounts( mysql_conn_t *mysql_conn, uint32_t uid, - acct_account_cond_t *acct_q, + acct_account_cond_t *acct_cond, acct_account_rec_t *acct) { #ifdef HAVE_MYSQL @@ -2608,7 +3050,7 @@ extern List acct_storage_p_modify_accounts( MYSQL_RES *result = NULL; MYSQL_ROW row; - if(!acct_q) { + if(!acct_cond) { error("we need something to change"); return NULL; } @@ -2621,10 +3063,12 @@ extern List acct_storage_p_modify_accounts( } xstrcat(extra, "where deleted=0"); - if(acct_q->acct_list && list_count(acct_q->acct_list)) { + if(acct_cond->assoc_cond + && acct_cond->assoc_cond->acct_list + && list_count(acct_cond->assoc_cond->acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->acct_list); + itr = list_iterator_create(acct_cond->assoc_cond->acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2635,10 +3079,11 @@ extern List acct_storage_p_modify_accounts( xstrcat(extra, ")"); } - if(acct_q->description_list && list_count(acct_q->description_list)) { + if(acct_cond->description_list + && list_count(acct_cond->description_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->description_list); + itr = list_iterator_create(acct_cond->description_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2649,10 +3094,11 @@ extern List acct_storage_p_modify_accounts( xstrcat(extra, ")"); } - if(acct_q->organization_list && list_count(acct_q->organization_list)) { + if(acct_cond->organization_list + && list_count(acct_cond->organization_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->organization_list); + itr = list_iterator_create(acct_cond->organization_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2663,16 +3109,55 @@ extern List acct_storage_p_modify_accounts( xstrcat(extra, ")"); } - if(acct_q->qos != ACCT_QOS_NOTSET) { - xstrfmtcat(extra, " && qos=%u", acct_q->qos); + if(acct_cond->qos_list && list_count(acct_cond->qos_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(acct_cond->qos_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, + "(qos like '%%,%s' || qos like '%%,%s,%%')", + object, object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); } if(acct->description) xstrfmtcat(vals, ", description='%s'", acct->description); if(acct->organization) - xstrfmtcat(vals, ", organization='%u'", acct->organization); - if(acct->qos != ACCT_QOS_NOTSET) - xstrfmtcat(vals, ", qos='%u'", acct->qos); + xstrfmtcat(vals, ", organization='%s'", acct->organization); + + if(acct->qos_list && list_count(acct->qos_list)) { + char *tmp_qos = NULL; + set = 0; + itr = list_iterator_create(acct->qos_list); + while((object = list_next(itr))) { + /* when adding we need to make sure we don't + * already have it so we remove it and then add + * it. + */ + if(object[0] == '-') { + xstrfmtcat(vals, + ", qos=replace(qos, ',%s', '')", + object+1); + } else if(object[0] == '+') { + xstrfmtcat(vals, + ", qos=concat(" + "replace(qos, ',%s', ''), ',%s')", + object+1, object+1); + } else { + xstrfmtcat(tmp_qos, ",%s", object); + } + } + list_iterator_destroy(itr); + if(tmp_qos) { + xstrfmtcat(vals, ", qos='%s'", tmp_qos); + xfree(tmp_qos); + } + } if(!extra || !vals) { errno = SLURM_NO_CHANGE_IN_DATA; @@ -2682,9 +3167,9 @@ extern List acct_storage_p_modify_accounts( query = xstrdup_printf("select name from %s %s;", acct_table, extra); xfree(extra); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); xfree(vals); return NULL; @@ -2735,7 +3220,7 @@ extern List acct_storage_p_modify_accounts( extern List acct_storage_p_modify_clusters(mysql_conn_t *mysql_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q, + acct_cluster_cond_t *cluster_cond, acct_cluster_rec_t *cluster) { #ifdef HAVE_MYSQL @@ -2757,7 +3242,7 @@ extern List acct_storage_p_modify_clusters(mysql_conn_t *mysql_conn, * the controller when it loads */ - if(!cluster_q) { + if(!cluster_cond) { error("we need something to change"); return NULL; } @@ -2770,10 +3255,11 @@ extern List acct_storage_p_modify_clusters(mysql_conn_t *mysql_conn, } xstrcat(extra, "where deleted=0"); - if(cluster_q->cluster_list && list_count(cluster_q->cluster_list)) { + if(cluster_cond->cluster_list + && list_count(cluster_cond->cluster_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(cluster_q->cluster_list); + itr = list_iterator_create(cluster_cond->cluster_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2800,9 +3286,9 @@ extern List acct_storage_p_modify_clusters(mysql_conn_t *mysql_conn, xstrfmtcat(query, "select name from %s %s;", cluster_table, extra); xfree(extra); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); xfree(vals); error("no result given for %s", extra); @@ -2856,10 +3342,10 @@ end_it: #endif } -extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, - uint32_t uid, - acct_association_cond_t *assoc_q, - acct_association_rec_t *assoc) +extern List acct_storage_p_modify_associations( + mysql_conn_t *mysql_conn, uint32_t uid, + acct_association_cond_t *assoc_cond, + acct_association_rec_t *assoc) { #ifdef HAVE_MYSQL ListIterator itr = NULL; @@ -2898,7 +3384,7 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, MASSOC_COUNT }; - if(!assoc_q) { + if(!assoc_cond) { error("we need something to change"); return NULL; } @@ -2949,10 +3435,10 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, user_name = pw->pw_name; } - if(assoc_q->acct_list && list_count(assoc_q->acct_list)) { + if(assoc_cond->acct_list && list_count(assoc_cond->acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->acct_list); + itr = list_iterator_create(assoc_cond->acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2963,10 +3449,10 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->cluster_list && list_count(assoc_q->cluster_list)) { + if(assoc_cond->cluster_list && list_count(assoc_cond->cluster_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->cluster_list); + itr = list_iterator_create(assoc_cond->cluster_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2977,10 +3463,10 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->user_list && list_count(assoc_q->user_list)) { + if(assoc_cond->user_list && list_count(assoc_cond->user_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->user_list); + itr = list_iterator_create(assoc_cond->user_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2994,10 +3480,10 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, xstrcat(extra, " && user = '' "); } - if(assoc_q->id_list && list_count(assoc_q->id_list)) { + if(assoc_cond->id_list && list_count(assoc_cond->id_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->id_list); + itr = list_iterator_create(assoc_cond->id_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3008,8 +3494,9 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->parent_acct) { - xstrfmtcat(extra, " && parent_acct='%s'", assoc_q->parent_acct); + if(assoc_cond->parent_acct) { + xstrfmtcat(extra, " && parent_acct='%s'", + assoc_cond->parent_acct); } if((int)assoc->fairshare >= 0) @@ -3059,9 +3546,9 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, xfree(object); xfree(extra); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -3100,7 +3587,7 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, error("We are here with no coord accts."); if(mysql_conn->rollback) { mysql_db_rollback( - mysql_conn->acct_mysql_db); + mysql_conn->db_conn); } errno = ESLURM_ACCESS_DENIED; mysql_free_result(result); @@ -3133,7 +3620,7 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, if(mysql_conn->rollback) { mysql_db_rollback( - mysql_conn->acct_mysql_db); + mysql_conn->db_conn); } errno = ESLURM_ACCESS_DENIED; mysql_free_result(result); @@ -3225,7 +3712,7 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, if(assoc->parent_acct) { if(rc != SLURM_SUCCESS) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); list_destroy(ret_list); @@ -3238,7 +3725,7 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, if(!list_count(ret_list)) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } errno = SLURM_NO_CHANGE_IN_DATA; debug3("didn't effect anything"); @@ -3252,7 +3739,7 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, user_name, assoc_table, name_char, vals) == SLURM_ERROR) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); error("Couldn't modify associations"); @@ -3273,7 +3760,7 @@ end_it: } extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { #ifdef HAVE_MYSQL ListIterator itr = NULL; @@ -3289,8 +3776,8 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, MYSQL_RES *result = NULL; MYSQL_ROW row; - if(!user_q) { - error("we need something to change"); + if(!user_cond) { + error("we need something to remove"); return NULL; } @@ -3303,10 +3790,11 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, xstrcat(extra, "where deleted=0"); - if(user_q->user_list && list_count(user_q->user_list)) { + if(user_cond->assoc_cond && user_cond->assoc_cond->user_list + && list_count(user_cond->assoc_cond->user_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(user_q->user_list); + itr = list_iterator_create(user_cond->assoc_cond->user_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3317,10 +3805,10 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, xstrcat(extra, ")"); } - if(user_q->def_acct_list && list_count(user_q->def_acct_list)) { + if(user_cond->def_acct_list && list_count(user_cond->def_acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(user_q->def_acct_list); + itr = list_iterator_create(user_cond->def_acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3331,12 +3819,24 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, xstrcat(extra, ")"); } - if(user_q->qos != ACCT_QOS_NOTSET) { - xstrfmtcat(extra, " && qos=%u", user_q->qos); + if(user_cond->qos_list && list_count(user_cond->qos_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(user_cond->qos_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, + "(qos like '%%,%s' || qos like '%%,%s,%%')", + object, object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); } - if(user_q->admin_level != ACCT_ADMIN_NOTSET) { - xstrfmtcat(extra, " && admin_level=%u", user_q->admin_level); + if(user_cond->admin_level != ACCT_ADMIN_NOTSET) { + xstrfmtcat(extra, " && admin_level=%u", user_cond->admin_level); } if(!extra) { @@ -3347,7 +3847,7 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, query = xstrdup_printf("select name from %s %s;", user_table, extra); xfree(extra); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -3356,6 +3856,8 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, ret_list = list_create(slurm_destroy_char); while((row = mysql_fetch_row(result))) { char *object = xstrdup(row[0]); + acct_user_rec_t *user_rec = NULL; + list_append(ret_list, object); if(!rc) { xstrfmtcat(name_char, "name='%s'", object); @@ -3365,6 +3867,11 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, xstrfmtcat(name_char, " || name='%s'", object); xstrfmtcat(assoc_char, " || t2.user='%s'", object); } + user_rec = xmalloc(sizeof(acct_user_rec_t)); + user_rec->name = xstrdup(object); + _addto_update_list(mysql_conn->update_list, ACCT_REMOVE_USER, + user_rec); + } mysql_free_result(result); @@ -3391,7 +3898,7 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, acct_coord_table, now, assoc_char); xfree(assoc_char); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't remove user coordinators"); @@ -3408,7 +3915,7 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, extern List acct_storage_p_remove_coord(mysql_conn_t *mysql_conn, uint32_t uid, List acct_list, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { #ifdef HAVE_MYSQL char *query = NULL, *object = NULL, *extra = NULL, *last_user = NULL; @@ -3424,6 +3931,11 @@ extern List acct_storage_p_remove_coord(mysql_conn_t *mysql_conn, uint32_t uid, MYSQL_ROW row; acct_user_rec_t user; + if(!user_cond) { + error("we need something to remove"); + return NULL; + } + if(_check_connection(mysql_conn) != SLURM_SUCCESS) return NULL; @@ -3470,14 +3982,17 @@ extern List acct_storage_p_remove_coord(mysql_conn_t *mysql_conn, uint32_t uid, user_name = pw->pw_name; } - if(user_q->user_list && list_count(user_q->user_list)) { + /* Leave it this way since we are using extra below */ + + if(user_cond->assoc_cond && user_cond->assoc_cond->user_list + && list_count(user_cond->assoc_cond->user_list)) { set = 0; if(extra) xstrcat(extra, " && ("); else xstrcat(extra, " ("); - itr = list_iterator_create(user_q->user_list); + itr = list_iterator_create(user_cond->assoc_cond->user_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3505,15 +4020,23 @@ extern List acct_storage_p_remove_coord(mysql_conn_t *mysql_conn, uint32_t uid, list_iterator_destroy(itr); xstrcat(extra, ")"); } + + if(!extra) { + errno = SLURM_ERROR; + debug3("No conditions given"); + return NULL; + } + query = xstrdup_printf( "select user, acct from %s where deleted=0 && %s order by user", acct_coord_table, extra); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = - mysql_db_query_ret(mysql_conn->acct_mysql_db, query, 0))) { + mysql_db_query_ret(mysql_conn->db_conn, query, 0))) { xfree(query); xfree(extra); + errno = SLURM_ERROR; return NULL; } xfree(query); @@ -3566,6 +4089,7 @@ extern List acct_storage_p_remove_coord(mysql_conn_t *mysql_conn, uint32_t uid, list_destroy(ret_list); list_destroy(user_list); xfree(extra); + errno = SLURM_ERROR; return NULL; } xfree(extra); @@ -3588,7 +4112,7 @@ extern List acct_storage_p_remove_coord(mysql_conn_t *mysql_conn, uint32_t uid, } extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, - acct_account_cond_t *acct_q) + acct_account_cond_t *acct_cond) { #ifdef HAVE_MYSQL ListIterator itr = NULL; @@ -3604,7 +4128,7 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, MYSQL_RES *result = NULL; MYSQL_ROW row; - if(!acct_q) { + if(!acct_cond) { error("we need something to change"); return NULL; } @@ -3617,10 +4141,12 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, return NULL; xstrcat(extra, "where deleted=0"); - if(acct_q->acct_list && list_count(acct_q->acct_list)) { + if(acct_cond->assoc_cond + && acct_cond->assoc_cond->acct_list + && list_count(acct_cond->assoc_cond->acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->acct_list); + itr = list_iterator_create(acct_cond->assoc_cond->acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3631,10 +4157,11 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, xstrcat(extra, ")"); } - if(acct_q->description_list && list_count(acct_q->description_list)) { + if(acct_cond->description_list + && list_count(acct_cond->description_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->description_list); + itr = list_iterator_create(acct_cond->description_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3645,10 +4172,11 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, xstrcat(extra, ")"); } - if(acct_q->organization_list && list_count(acct_q->organization_list)) { + if(acct_cond->organization_list + && list_count(acct_cond->organization_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->organization_list); + itr = list_iterator_create(acct_cond->organization_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3659,10 +4187,22 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, xstrcat(extra, ")"); } - if(acct_q->qos != ACCT_QOS_NOTSET) { - xstrfmtcat(extra, " && qos=%u", acct_q->qos); - } - + if(acct_cond->qos_list && list_count(acct_cond->qos_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(acct_cond->qos_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, + "(qos like '%%,%s' || qos like '%%,%s,%%')", + object, object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + if(!extra) { error("Nothing to remove"); return NULL; @@ -3671,7 +4211,7 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, query = xstrdup_printf("select name from %s %s;", acct_table, extra); xfree(extra); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -3719,7 +4259,7 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q) + acct_cluster_cond_t *cluster_cond) { #ifdef HAVE_MYSQL ListIterator itr = NULL; @@ -3736,7 +4276,7 @@ extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn, MYSQL_ROW row; int day_old = now - DELETE_SEC_BACK; - if(!cluster_q) { + if(!cluster_cond) { error("we need something to change"); return NULL; } @@ -3748,10 +4288,11 @@ extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn, user_name = pw->pw_name; } xstrcat(extra, "where deleted=0"); - if(cluster_q->cluster_list && list_count(cluster_q->cluster_list)) { + if(cluster_cond->cluster_list + && list_count(cluster_cond->cluster_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(cluster_q->cluster_list); + itr = list_iterator_create(cluster_cond->cluster_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3770,7 +4311,7 @@ extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn, query = xstrdup_printf("select name from %s %s;", cluster_table, extra); xfree(extra); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -3815,12 +4356,12 @@ extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn, cluster_hour_table, now, assoc_char, cluster_month_table, now, assoc_char); xfree(assoc_char); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); list_destroy(ret_list); @@ -3849,9 +4390,9 @@ extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn, #endif } -extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, - uint32_t uid, - acct_association_cond_t *assoc_q) +extern List acct_storage_p_remove_associations( + mysql_conn_t *mysql_conn, uint32_t uid, + acct_association_cond_t *assoc_cond) { #ifdef HAVE_MYSQL ListIterator itr = NULL; @@ -3889,7 +4430,7 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, RASSOC_COUNT }; - if(!assoc_q) { + if(!assoc_cond) { error("we need something to change"); return NULL; } @@ -3942,10 +4483,10 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, user_name = pw->pw_name; } - if(assoc_q->acct_list && list_count(assoc_q->acct_list)) { + if(assoc_cond->acct_list && list_count(assoc_cond->acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->acct_list); + itr = list_iterator_create(assoc_cond->acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3956,10 +4497,10 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->cluster_list && list_count(assoc_q->cluster_list)) { + if(assoc_cond->cluster_list && list_count(assoc_cond->cluster_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->cluster_list); + itr = list_iterator_create(assoc_cond->cluster_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3970,10 +4511,10 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->user_list && list_count(assoc_q->user_list)) { + if(assoc_cond->user_list && list_count(assoc_cond->user_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->user_list); + itr = list_iterator_create(assoc_cond->user_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3984,10 +4525,10 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->id_list && list_count(assoc_q->id_list)) { + if(assoc_cond->id_list && list_count(assoc_cond->id_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->id_list); + itr = list_iterator_create(assoc_cond->id_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3998,9 +4539,9 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->parent_acct) { + if(assoc_cond->parent_acct) { xstrfmtcat(extra, " && parent_acct='%s'", - assoc_q->parent_acct); + assoc_cond->parent_acct); } for(i=0; i<RASSOC_COUNT; i++) { @@ -4014,7 +4555,7 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, assoc_table, extra); xfree(extra); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -4034,7 +4575,7 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, if(!name_char) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); errno = SLURM_NO_CHANGE_IN_DATA; @@ -4050,11 +4591,11 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, assoc_table, name_char); xfree(extra); xfree(object); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); xfree(query); @@ -4144,7 +4685,7 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, return ret_list; end_it: if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); @@ -4160,8 +4701,143 @@ end_it: #endif } +extern List acct_storage_p_remove_qos(mysql_conn_t *mysql_conn, uint32_t uid, + acct_qos_cond_t *qos_cond) +{ +#ifdef HAVE_MYSQL + ListIterator itr = NULL; + List ret_list = NULL; + int rc = SLURM_SUCCESS; + char *object = NULL; + char *extra = NULL, *query = NULL, + *name_char = NULL, *assoc_char = NULL; + time_t now = time(NULL); + struct passwd *pw = NULL; + char *user_name = NULL; + int set = 0; + MYSQL_RES *result = NULL; + MYSQL_ROW row; + + if(!qos_cond) { + error("we need something to change"); + return NULL; + } + + if((pw=getpwuid(uid))) { + user_name = pw->pw_name; + } + + if(_check_connection(mysql_conn) != SLURM_SUCCESS) + return NULL; + + xstrcat(extra, "where deleted=0"); + if(qos_cond->description_list + && list_count(qos_cond->description_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(qos_cond->description_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "description='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + + if(qos_cond->id_list + && list_count(qos_cond->id_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(qos_cond->id_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "id='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + + if(qos_cond->name_list + && list_count(qos_cond->name_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(qos_cond->name_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "name='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + + if(!extra) { + error("Nothing to remove"); + return NULL; + } + + query = xstrdup_printf("select id from %s %s;", qos_table, extra); + xfree(extra); + if(!(result = mysql_db_query_ret( + mysql_conn->db_conn, query, 0))) { + xfree(query); + return NULL; + } + + rc = 0; + ret_list = list_create(slurm_destroy_char); + while((row = mysql_fetch_row(result))) { + char *object = xstrdup(row[0]); + acct_qos_rec_t *qos_rec = NULL; + + list_append(ret_list, object); + if(!rc) { + xstrfmtcat(name_char, "id='%s'", object); + xstrfmtcat(assoc_char, "qos=replace(qos, ',%s', '')", + object); + rc = 1; + } else { + xstrfmtcat(name_char, " || id='%s'", object); + xstrfmtcat(assoc_char, ", qos=replace(qos, ',%s', '')", + object); + } + qos_rec = xmalloc(sizeof(acct_qos_rec_t)); + qos_rec->name = xstrdup(object); + _addto_update_list(mysql_conn->update_list, ACCT_REMOVE_QOS, + qos_rec); + } + mysql_free_result(result); + + if(!list_count(ret_list)) { + errno = SLURM_NO_CHANGE_IN_DATA; + debug3("didn't effect anything\n%s", query); + xfree(query); + return ret_list; + } + xfree(query); + + if(_remove_common(mysql_conn, DBD_REMOVE_ACCOUNTS, now, + user_name, qos_table, name_char, assoc_char) + == SLURM_ERROR) { + list_destroy(ret_list); + xfree(name_char); + return NULL; + } + xfree(name_char); + + return ret_list; +#else + return NULL; +#endif +} + extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { #ifdef HAVE_MYSQL char *query = NULL; @@ -4185,7 +4861,7 @@ extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn, enum { USER_REQ_NAME, USER_REQ_DA, - USER_REQ_EX, + USER_REQ_QOS, USER_REQ_AL, USER_REQ_COUNT }; @@ -4195,21 +4871,23 @@ extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn, - if(!user_q) { + if(!user_cond) { xstrcat(extra, "where deleted=0"); goto empty; } - if(user_q->with_deleted) + if(user_cond->with_deleted) xstrcat(extra, "where (deleted=0 || deleted=1)"); else xstrcat(extra, "where deleted=0"); - if(user_q->user_list && list_count(user_q->user_list)) { + if(user_cond->assoc_cond && + user_cond->assoc_cond->user_list + && list_count(user_cond->assoc_cond->user_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(user_q->user_list); + itr = list_iterator_create(user_cond->assoc_cond->user_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4220,10 +4898,10 @@ extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(user_q->def_acct_list && list_count(user_q->def_acct_list)) { + if(user_cond->def_acct_list && list_count(user_cond->def_acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(user_q->def_acct_list); + itr = list_iterator_create(user_cond->def_acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4234,22 +4912,25 @@ extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(user_q->qos != ACCT_QOS_NOTSET) { - if(extra) - xstrfmtcat(extra, " && qos=%u", user_q->qos); - else - xstrfmtcat(extra, " where qos=%u", - user_q->qos); - + if(user_cond->qos_list && list_count(user_cond->qos_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(user_cond->qos_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, + "(qos like '%%,%s' || qos like '%%,%s,%%')", + object, object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); } - if(user_q->admin_level != ACCT_ADMIN_NOTSET) { - if(extra) - xstrfmtcat(extra, " && admin_level=%u", - user_q->admin_level); - else - xstrfmtcat(extra, " where admin_level=%u", - user_q->admin_level); + if(user_cond->admin_level != ACCT_ADMIN_NOTSET) { + xstrfmtcat(extra, " && admin_level=%u", + user_cond->admin_level); } empty: @@ -4263,9 +4944,9 @@ empty: xfree(tmp); xfree(extra); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -4281,7 +4962,11 @@ empty: user->name = xstrdup(row[USER_REQ_NAME]); user->default_acct = xstrdup(row[USER_REQ_DA]); user->admin_level = atoi(row[USER_REQ_AL]); - user->qos = atoi(row[USER_REQ_EX]); + if(row[USER_REQ_QOS] && row[USER_REQ_QOS][0]) { + user->qos_list = list_create(slurm_destroy_char); + slurm_addto_char_list(user->qos_list, + row[USER_REQ_QOS]); + } /* user id will be set on the client since this could be on a * different machine where this user may not exist or @@ -4292,26 +4977,26 @@ empty: /* user->uid = passwd_ptr->pw_uid; */ /* else */ /* user->uid = (uint32_t)NO_VAL; */ - if(user_q && user_q->with_coords) { + if(user_cond && user_cond->with_coords) { _get_user_coords(mysql_conn, user); } - if(user_q && user_q->with_assocs) { - acct_association_cond_t *assoc_q = NULL; - if(!user_q->assoc_cond) { - user_q->assoc_cond = xmalloc( + if(user_cond && user_cond->with_assocs) { + acct_association_cond_t *assoc_cond = NULL; + if(!user_cond->assoc_cond) { + user_cond->assoc_cond = xmalloc( sizeof(acct_association_cond_t)); } - assoc_q = user_q->assoc_cond; - if(assoc_q->user_list) - list_destroy(assoc_q->user_list); + assoc_cond = user_cond->assoc_cond; + if(assoc_cond->user_list) + list_destroy(assoc_cond->user_list); - assoc_q->user_list = list_create(NULL); - list_append(assoc_q->user_list, user->name); + assoc_cond->user_list = list_create(NULL); + list_append(assoc_cond->user_list, user->name); user->assoc_list = acct_storage_p_get_associations( - mysql_conn, assoc_q); - list_destroy(assoc_q->user_list); - assoc_q->user_list = NULL; + mysql_conn, assoc_cond); + list_destroy(assoc_cond->user_list); + assoc_cond->user_list = NULL; } } mysql_free_result(result); @@ -4323,7 +5008,7 @@ empty: } extern List acct_storage_p_get_accts(mysql_conn_t *mysql_conn, - acct_account_cond_t *acct_q) + acct_account_cond_t *acct_cond) { #ifdef HAVE_MYSQL char *query = NULL; @@ -4356,20 +5041,22 @@ extern List acct_storage_p_get_accts(mysql_conn_t *mysql_conn, return NULL; - if(!acct_q) { + if(!acct_cond) { xstrcat(extra, "where deleted=0"); goto empty; } - if(acct_q->with_deleted) + if(acct_cond->with_deleted) xstrcat(extra, "where (deleted=0 || deleted=1)"); else xstrcat(extra, "where deleted=0"); - if(acct_q->acct_list && list_count(acct_q->acct_list)) { + if(acct_cond->assoc_cond + && acct_cond->assoc_cond->acct_list + && list_count(acct_cond->assoc_cond->acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->acct_list); + itr = list_iterator_create(acct_cond->assoc_cond->acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4380,10 +5067,11 @@ extern List acct_storage_p_get_accts(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(acct_q->description_list && list_count(acct_q->description_list)) { + if(acct_cond->description_list + && list_count(acct_cond->description_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->description_list); + itr = list_iterator_create(acct_cond->description_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4394,10 +5082,11 @@ extern List acct_storage_p_get_accts(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(acct_q->organization_list && list_count(acct_q->organization_list)) { + if(acct_cond->organization_list + && list_count(acct_cond->organization_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->organization_list); + itr = list_iterator_create(acct_cond->organization_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4408,12 +5097,20 @@ extern List acct_storage_p_get_accts(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(acct_q->qos != ACCT_QOS_NOTSET) { - if(extra) - xstrfmtcat(extra, " && qos=%u", acct_q->qos); - else - xstrfmtcat(extra, " where qos=%u", - acct_q->qos); + if(acct_cond->qos_list && list_count(acct_cond->qos_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(acct_cond->qos_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, + "(qos like '%%,%s' || qos like '%%,%s,%%')", + object, object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); } empty: @@ -4428,9 +5125,9 @@ empty: xfree(tmp); xfree(extra); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -4445,28 +5142,32 @@ empty: acct->name = xstrdup(row[ACCT_REQ_NAME]); acct->description = xstrdup(row[ACCT_REQ_DESC]); acct->organization = xstrdup(row[ACCT_REQ_ORG]); - acct->qos = atoi(row[ACCT_REQ_QOS]); + if(row[ACCT_REQ_QOS] && row[ACCT_REQ_QOS][0]) { + acct->qos_list = list_create(slurm_destroy_char); + slurm_addto_char_list(acct->qos_list, + row[ACCT_REQ_QOS]); + } - if(acct_q && acct_q->with_coords) { + if(acct_cond && acct_cond->with_coords) { _get_account_coords(mysql_conn, acct); } - if(acct_q && acct_q->with_assocs) { - acct_association_cond_t *assoc_q = NULL; - if(!acct_q->assoc_cond) { - acct_q->assoc_cond = xmalloc( + if(acct_cond && acct_cond->with_assocs) { + acct_association_cond_t *assoc_cond = NULL; + if(!acct_cond->assoc_cond) { + acct_cond->assoc_cond = xmalloc( sizeof(acct_association_cond_t)); } - assoc_q = acct_q->assoc_cond; - if(assoc_q->acct_list) - list_destroy(assoc_q->acct_list); + assoc_cond = acct_cond->assoc_cond; + if(assoc_cond->acct_list) + list_destroy(assoc_cond->acct_list); - assoc_q->acct_list = list_create(NULL); - list_append(assoc_q->acct_list, acct->name); + assoc_cond->acct_list = list_create(NULL); + list_append(assoc_cond->acct_list, acct->name); acct->assoc_list = acct_storage_p_get_associations( - mysql_conn, assoc_q); - list_destroy(assoc_q->acct_list); - assoc_q->acct_list = NULL; + mysql_conn, assoc_cond); + list_destroy(assoc_cond->acct_list); + assoc_cond->acct_list = NULL; } } @@ -4479,7 +5180,7 @@ empty: } extern List acct_storage_p_get_clusters(mysql_conn_t *mysql_conn, - acct_cluster_cond_t *cluster_q) + acct_cluster_cond_t *cluster_cond) { #ifdef HAVE_MYSQL char *query = NULL; @@ -4525,20 +5226,20 @@ extern List acct_storage_p_get_clusters(mysql_conn_t *mysql_conn, return NULL; - if(!cluster_q) { + if(!cluster_cond) { xstrcat(extra, "where deleted=0"); goto empty; } - if(cluster_q->with_deleted) + if(cluster_cond->with_deleted) xstrcat(extra, "where (deleted=0 || deleted=1)"); else xstrcat(extra, "where deleted=0"); - if(cluster_q->cluster_list && list_count(cluster_q->cluster_list)) { + if(cluster_cond->cluster_list && list_count(cluster_cond->cluster_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(cluster_q->cluster_list); + itr = list_iterator_create(cluster_cond->cluster_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4563,9 +5264,9 @@ empty: xfree(tmp); xfree(extra); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -4589,10 +5290,11 @@ empty: cluster->name = xstrdup(row[CLUSTER_REQ_NAME]); /* get the usage if requested */ - if(cluster_q->with_usage) { - clusteracct_storage_p_get_usage(mysql_conn, cluster, - cluster_q->usage_start, - cluster_q->usage_end); + if(cluster_cond->with_usage) { + clusteracct_storage_p_get_usage( + mysql_conn, cluster, + cluster_cond->usage_start, + cluster_cond->usage_end); } cluster->control_host = xstrdup(row[CLUSTER_REQ_CH]); @@ -4600,7 +5302,7 @@ empty: query = xstrdup_printf("select %s from %s where cluster='%s' " "&& acct='root'", tmp, assoc_table, cluster->name); - if(!(result2 = mysql_db_query_ret(mysql_conn->acct_mysql_db, + if(!(result2 = mysql_db_query_ret(mysql_conn->db_conn, query, 1))) { xfree(query); break; @@ -4608,29 +5310,29 @@ empty: xfree(query); row2 = mysql_fetch_row(result2); - if(row2[ASSOC_REQ_FS]) + if(row2 && row2[ASSOC_REQ_FS]) cluster->default_fairshare = atoi(row2[ASSOC_REQ_FS]); else cluster->default_fairshare = 1; - if(row2[ASSOC_REQ_MJ]) + if(row2 && row2[ASSOC_REQ_MJ]) cluster->default_max_jobs = atoi(row2[ASSOC_REQ_MJ]); else cluster->default_max_jobs = INFINITE; - if(row2[ASSOC_REQ_MNPJ]) + if(row2 && row2[ASSOC_REQ_MNPJ]) cluster->default_max_nodes_per_job = atoi(row2[ASSOC_REQ_MNPJ]); else cluster->default_max_nodes_per_job = INFINITE; - if(row2[ASSOC_REQ_MWPJ]) + if(row2 && row2[ASSOC_REQ_MWPJ]) cluster->default_max_wall_duration_per_job = atoi(row2[ASSOC_REQ_MWPJ]); else cluster->default_max_wall_duration_per_job = INFINITE; - if(row2[ASSOC_REQ_MCPJ]) + if(row2 && row2[ASSOC_REQ_MCPJ]) cluster->default_max_cpu_secs_per_job = atoi(row2[ASSOC_REQ_MCPJ]); else @@ -4647,7 +5349,7 @@ empty: } extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, - acct_association_cond_t *assoc_q) + acct_association_cond_t *assoc_cond) { #ifdef HAVE_MYSQL char *query = NULL; @@ -4671,6 +5373,11 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, uint32_t user_parent_id = 0; uint32_t acct_parent_id = 0; + /* needed if we don't have an assoc_cond */ + uint16_t without_parent_info = 0; + uint16_t without_parent_limits = 0; + uint16_t with_usage = 0; + /* if this changes you will need to edit the corresponding enum */ char *assoc_req_inx[] = { "id", @@ -4715,20 +5422,20 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, return NULL; - if(!assoc_q) { + if(!assoc_cond) { xstrcat(extra, "where deleted=0"); goto empty; } - if(assoc_q->with_deleted) + if(assoc_cond->with_deleted) xstrcat(extra, "where (deleted=0 || deleted=1)"); else xstrcat(extra, "where deleted=0"); - if(assoc_q->acct_list && list_count(assoc_q->acct_list)) { + if(assoc_cond->acct_list && list_count(assoc_cond->acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->acct_list); + itr = list_iterator_create(assoc_cond->acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4739,10 +5446,10 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->cluster_list && list_count(assoc_q->cluster_list)) { + if(assoc_cond->cluster_list && list_count(assoc_cond->cluster_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->cluster_list); + itr = list_iterator_create(assoc_cond->cluster_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4753,10 +5460,10 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->user_list && list_count(assoc_q->user_list)) { + if(assoc_cond->user_list && list_count(assoc_cond->user_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->user_list); + itr = list_iterator_create(assoc_cond->user_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4767,10 +5474,10 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->id_list && list_count(assoc_q->id_list)) { + if(assoc_cond->id_list && list_count(assoc_cond->id_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->id_list); + itr = list_iterator_create(assoc_cond->id_list); while((object = list_next(itr))) { char *ptr = NULL; long num = strtol(object, &ptr, 10); @@ -4791,9 +5498,14 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->parent_acct) { - xstrfmtcat(extra, " && parent_acct='%s'", assoc_q->parent_acct); + if(assoc_cond->parent_acct) { + xstrfmtcat(extra, " && parent_acct='%s'", + assoc_cond->parent_acct); } + + with_usage = assoc_cond->with_usage; + without_parent_limits = assoc_cond->without_parent_limits; + without_parent_info = assoc_cond->without_parent_info; empty: xfree(tmp); xstrfmtcat(tmp, "%s", assoc_req_inx[i]); @@ -4805,9 +5517,9 @@ empty: tmp, assoc_table, extra); xfree(tmp); xfree(extra); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -4828,10 +5540,10 @@ empty: assoc->rgt = atoi(row[ASSOC_REQ_RGT]); /* get the usage if requested */ - if(assoc_q->with_usage) { + if(with_usage) { acct_storage_p_get_usage(mysql_conn, assoc, - assoc_q->usage_start, - assoc_q->usage_end); + assoc_cond->usage_start, + assoc_cond->usage_end); } if(row[ASSOC_REQ_USER][0]) @@ -4839,7 +5551,8 @@ empty: assoc->acct = xstrdup(row[ASSOC_REQ_ACCT]); assoc->cluster = xstrdup(row[ASSOC_REQ_CLUSTER]); - if(!assoc_q->without_parent_info && row[ASSOC_REQ_PARENT][0]) { + if(!without_parent_info + && row[ASSOC_REQ_PARENT][0]) { /* info("got %s?=%s and %s?=%s", */ /* row[ASSOC_REQ_PARENT], last_acct_parent, */ /* row[ASSOC_REQ_CLUSTER], last_cluster); */ @@ -4852,10 +5565,11 @@ empty: "and cluster='%s';", assoc_table, row[ASSOC_REQ_PARENT], row[ASSOC_REQ_CLUSTER]); - debug4("%d query\n%s", mysql_conn->conn, query); + debug4("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); if(!(result2 = mysql_db_query_ret( - mysql_conn->acct_mysql_db, + mysql_conn->db_conn, query, 1))) { xfree(query); break; @@ -4886,10 +5600,10 @@ empty: "select @par_id, @mj, @mnpj, @mwpj, @mcpj;", assoc_table, row[ASSOC_REQ_ACCT], row[ASSOC_REQ_CLUSTER], - assoc_q->without_parent_limits); + without_parent_limits); if(!(result2 = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 1))) { + mysql_conn->db_conn, query, 1))) { xfree(query); break; } @@ -4897,24 +5611,27 @@ empty: row2 = mysql_fetch_row(result2); user_parent_id = atoi(row2[ASSOC2_REQ_PARENT_ID]); - if(!assoc_q->without_parent_limits) { + if(!without_parent_limits) { if(row2[ASSOC2_REQ_MJ]) parent_mj = atoi(row2[ASSOC2_REQ_MJ]); else parent_mj = INFINITE; if(row2[ASSOC2_REQ_MNPJ]) - parent_mnpj = atoi(row2[ASSOC2_REQ_MNPJ]); + parent_mnpj = + atoi(row2[ASSOC2_REQ_MNPJ]); else parent_mwpj = INFINITE; if(row2[ASSOC2_REQ_MWPJ]) - parent_mwpj = atoi(row2[ASSOC2_REQ_MWPJ]); + parent_mwpj = + atoi(row2[ASSOC2_REQ_MWPJ]); else parent_mwpj = INFINITE; if(row2[ASSOC2_REQ_MCPJ]) - parent_mcpj = atoi(row2[ASSOC2_REQ_MCPJ]); + parent_mcpj = + atoi(row2[ASSOC2_REQ_MCPJ]); else parent_mcpj = INFINITE; } @@ -4957,6 +5674,302 @@ empty: #endif } +extern List acct_storage_p_get_qos(mysql_conn_t *mysql_conn, + acct_qos_cond_t *qos_cond) +{ +#ifdef HAVE_MYSQL + char *query = NULL; + char *extra = NULL; + char *tmp = NULL; + List qos_list = NULL; + ListIterator itr = NULL; + char *object = NULL; + int set = 0; + int i=0; + MYSQL_RES *result = NULL; + MYSQL_ROW row; + + /* if this changes you will need to edit the corresponding enum */ + char *qos_req_inx[] = { + "name", + "description", + "id" + }; + enum { + QOS_REQ_NAME, + QOS_REQ_DESC, + QOS_REQ_ID, + QOS_REQ_COUNT + }; + + if(_check_connection(mysql_conn) != SLURM_SUCCESS) + return NULL; + + + + if(!qos_cond) { + xstrcat(extra, "where deleted=0"); + goto empty; + } + + if(qos_cond->with_deleted) + xstrcat(extra, "where (deleted=0 || deleted=1)"); + else + xstrcat(extra, "where deleted=0"); + + + if(qos_cond->description_list + && list_count(qos_cond->description_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(qos_cond->description_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "description='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + + if(qos_cond->id_list + && list_count(qos_cond->id_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(qos_cond->id_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "id='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + + if(qos_cond->name_list + && list_count(qos_cond->name_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(qos_cond->name_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "name='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + +empty: + + xfree(tmp); + xstrfmtcat(tmp, "%s", qos_req_inx[i]); + for(i=1; i<QOS_REQ_COUNT; i++) { + xstrfmtcat(tmp, ", %s", qos_req_inx[i]); + } + + query = xstrdup_printf("select %s from %s %s", tmp, qos_table, extra); + xfree(tmp); + xfree(extra); + + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + if(!(result = mysql_db_query_ret( + mysql_conn->db_conn, query, 0))) { + xfree(query); + return NULL; + } + xfree(query); + + qos_list = list_create(destroy_acct_qos_rec); + + while((row = mysql_fetch_row(result))) { + acct_qos_rec_t *qos = xmalloc(sizeof(acct_qos_rec_t)); + list_append(qos_list, qos); + + qos->description = xstrdup(row[QOS_REQ_DESC]); + qos->id = atoi(row[QOS_REQ_ID]); + qos->name = xstrdup(row[QOS_REQ_NAME]); + } + mysql_free_result(result); + + return qos_list; +#else + return NULL; +#endif +} + +extern List acct_storage_p_get_txn(mysql_conn_t *mysql_conn, + acct_txn_cond_t *txn_cond) +{ +#ifdef HAVE_MYSQL + char *query = NULL; + char *extra = NULL; + char *tmp = NULL; + List txn_list = NULL; + ListIterator itr = NULL; + char *object = NULL; + int set = 0; + int i=0; + MYSQL_RES *result = NULL; + MYSQL_ROW row; + + /* if this changes you will need to edit the corresponding enum */ + char *txn_req_inx[] = { + "id", + "timestamp", + "action", + "name", + "actor", + "info" + }; + enum { + TXN_REQ_ID, + TXN_REQ_TS, + TXN_REQ_ACTION, + TXN_REQ_NAME, + TXN_REQ_ACTOR, + TXN_REQ_INFO, + TXN_REQ_COUNT + }; + + if(_check_connection(mysql_conn) != SLURM_SUCCESS) + return NULL; + + if(!txn_cond) + goto empty; + + if(txn_cond->action_list && list_count(txn_cond->action_list)) { + set = 0; + if(extra) + xstrcat(extra, " && ("); + else + xstrcat(extra, " where ("); + itr = list_iterator_create(txn_cond->action_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "action='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + + if(txn_cond->actor_list && list_count(txn_cond->actor_list)) { + set = 0; + if(extra) + xstrcat(extra, " && ("); + else + xstrcat(extra, " where ("); + itr = list_iterator_create(txn_cond->actor_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "actor='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + + if(txn_cond->id_list && list_count(txn_cond->id_list)) { + set = 0; + if(extra) + xstrcat(extra, " && ("); + else + xstrcat(extra, " where ("); + itr = list_iterator_create(txn_cond->id_list); + while((object = list_next(itr))) { + char *ptr = NULL; + long num = strtol(object, &ptr, 10); + if ((num == 0) && ptr && ptr[0]) { + error("Invalid value for txn id (%s)", + object); + xfree(extra); + list_iterator_destroy(itr); + return NULL; + } + + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "id=%s", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + + if(txn_cond->time_start && txn_cond->time_end) { + if(extra) + xstrcat(extra, " && ("); + else + xstrcat(extra, " where ("); + xstrfmtcat(extra, "timestamp < %d && timestamp >= %d)", + txn_cond->time_end, txn_cond->time_start); + } else if(txn_cond->time_start) { + if(extra) + xstrcat(extra, " && ("); + else + xstrcat(extra, " where ("); + xstrfmtcat(extra, "timestamp >= %d)", txn_cond->time_start); + + } else if(txn_cond->time_end) { + if(extra) + xstrcat(extra, " && ("); + else + xstrcat(extra, " where ("); + xstrfmtcat(extra, "timestamp < %d)", txn_cond->time_end); + } +empty: + xfree(tmp); + xstrfmtcat(tmp, "%s", txn_req_inx[i]); + for(i=1; i<TXN_REQ_COUNT; i++) { + xstrfmtcat(tmp, ", %s", txn_req_inx[i]); + } + + query = xstrdup_printf("select %s from %s", tmp, txn_table); + + if(extra) { + xstrfmtcat(query, "%s", extra); + xfree(extra); + } + xstrcat(query, " order by timestamp;"); + + xfree(tmp); + + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + if(!(result = mysql_db_query_ret( + mysql_conn->db_conn, query, 0))) { + xfree(query); + return NULL; + } + xfree(query); + + txn_list = list_create(destroy_acct_txn_rec); + + while((row = mysql_fetch_row(result))) { + acct_txn_rec_t *txn = xmalloc(sizeof(acct_txn_rec_t)); + + list_append(txn_list, txn); + + txn->action = atoi(row[TXN_REQ_ACTION]); + txn->actor_name = xstrdup(row[TXN_REQ_ACTOR]); + txn->id = atoi(row[TXN_REQ_ID]); + txn->set_info = xstrdup(row[TXN_REQ_INFO]); + txn->timestamp = atoi(row[TXN_REQ_TS]); + txn->where_query = xstrdup(row[TXN_REQ_NAME]); + } + mysql_free_result(result); + + return txn_list; +#else + return NULL; +#endif +} + extern int acct_storage_p_get_usage(mysql_conn_t *mysql_conn, acct_association_rec_t *acct_assoc, time_t start, time_t end) @@ -5066,9 +6079,9 @@ extern int acct_storage_p_get_usage(mysql_conn_t *mysql_conn, tmp, my_usage_table, assoc_table, assoc_table, end, start, acct_assoc->id); xfree(tmp); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -5141,7 +6154,7 @@ extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn, xfree(tmp); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -5163,7 +6176,7 @@ extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn, mysql_free_result(result); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -5316,7 +6329,7 @@ extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn, if(query) { debug3("%s", query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); } return rc; @@ -5339,6 +6352,11 @@ extern int clusteracct_storage_p_node_down(mysql_conn_t *mysql_conn, if(_check_connection(mysql_conn) != SLURM_SUCCESS) return SLURM_ERROR; + if(!node_ptr) { + error("No node_ptr given!"); + return SLURM_ERROR; + } + if (slurmctld_conf.fast_schedule && !slurmdbd_conf) cpus = node_ptr->config_ptr->cpus; else @@ -5361,7 +6379,7 @@ extern int clusteracct_storage_p_node_down(mysql_conn_t *mysql_conn, "values ('%s', '%s', %u, %d, '%s');", event_table, node_ptr->name, cluster, cpus, event_time, my_reason); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); return rc; @@ -5385,7 +6403,7 @@ extern int clusteracct_storage_p_node_up(mysql_conn_t *mysql_conn, "update %s set period_end=%d where cluster='%s' " "and period_end=0 and node_name='%s';", event_table, event_time, cluster, node_ptr->name); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); return rc; #else @@ -5419,7 +6437,7 @@ extern int clusteracct_storage_p_cluster_procs(mysql_conn_t *mysql_conn, "and period_end=0 and node_name=''", event_table, cluster); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -5443,16 +6461,16 @@ extern int clusteracct_storage_p_cluster_procs(mysql_conn_t *mysql_conn, "update %s set period_end=%d where cluster='%s' " "and period_end=0 and node_name=''", event_table, event_time, cluster); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) goto end_it; add_it: query = xstrdup_printf( - "insert into %s (cluster, cpu_count, period_start) " - "values ('%s', %u, %d)", + "insert into %s (cluster, cpu_count, period_start, reason) " + "values ('%s', %u, %d, 'Cluster processor count')", event_table, cluster, procs, event_time); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); end_it: @@ -5576,9 +6594,9 @@ extern int clusteracct_storage_p_get_usage( tmp, my_usage_table, end, start, cluster_rec->name); xfree(tmp); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -5701,14 +6719,14 @@ extern int jobacct_storage_p_job_start(mysql_conn_t *mysql_conn, try_again: if(!(job_ptr->db_index = mysql_insert_ret_id( - mysql_conn->acct_mysql_db, query))) { + mysql_conn->db_conn, query))) { if(!reinit) { error("It looks like the storage has gone " "away trying to reconnect"); mysql_close_db_connection( - &mysql_conn->acct_mysql_db); + &mysql_conn->db_conn); mysql_get_db_connection( - &mysql_conn->acct_mysql_db, + &mysql_conn->db_conn, mysql_db_name, mysql_db_info); reinit = 1; goto try_again; @@ -5726,7 +6744,7 @@ extern int jobacct_storage_p_job_start(mysql_conn_t *mysql_conn, job_ptr->job_state & (~JOB_COMPLETING), job_ptr->total_procs, nodes, job_ptr->account, job_ptr->db_index); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); } xfree(block_id); @@ -5771,7 +6789,7 @@ extern int jobacct_storage_p_job_complete(mysql_conn_t *mysql_conn, nodes = "(null)"; if(!job_ptr->db_index) { - job_ptr->db_index = _get_db_index(mysql_conn->acct_mysql_db, + job_ptr->db_index = _get_db_index(mysql_conn->db_conn, job_ptr->details->submit_time, job_ptr->job_id, job_ptr->assoc_id); @@ -5788,7 +6806,7 @@ extern int jobacct_storage_p_job_complete(mysql_conn_t *mysql_conn, job_ptr->job_state & (~JOB_COMPLETING), nodes, job_ptr->exit_code, job_ptr->requid, job_ptr->db_index); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); return rc; @@ -5858,7 +6876,7 @@ extern int jobacct_storage_p_step_start(mysql_conn_t *mysql_conn, if(!step_ptr->job_ptr->db_index) { step_ptr->job_ptr->db_index = - _get_db_index(mysql_conn->acct_mysql_db, + _get_db_index(mysql_conn->db_conn, step_ptr->job_ptr->details->submit_time, step_ptr->job_ptr->job_id, step_ptr->job_ptr->assoc_id); @@ -5876,8 +6894,8 @@ extern int jobacct_storage_p_step_start(mysql_conn_t *mysql_conn, step_ptr->step_id, (int)step_ptr->start_time, step_ptr->name, JOB_RUNNING, cpus, node_list, cpus, JOB_RUNNING); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); return rc; @@ -5965,7 +6983,7 @@ extern int jobacct_storage_p_step_complete(mysql_conn_t *mysql_conn, if(!step_ptr->job_ptr->db_index) { step_ptr->job_ptr->db_index = - _get_db_index(mysql_conn->acct_mysql_db, + _get_db_index(mysql_conn->db_conn, step_ptr->job_ptr->details->submit_time, step_ptr->job_ptr->job_id, step_ptr->job_ptr->assoc_id); @@ -6016,7 +7034,7 @@ extern int jobacct_storage_p_step_complete(mysql_conn_t *mysql_conn, jobacct->min_cpu_id.nodeid, /* min cpu node */ ave_cpu, /* ave cpu */ step_ptr->job_ptr->db_index, step_ptr->step_id); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); return rc; @@ -6039,7 +7057,7 @@ extern int jobacct_storage_p_suspend(mysql_conn_t *mysql_conn, if(_check_connection(mysql_conn) != SLURM_SUCCESS) return SLURM_ERROR; if(!job_ptr->db_index) { - job_ptr->db_index = _get_db_index(mysql_conn->acct_mysql_db, + job_ptr->db_index = _get_db_index(mysql_conn->db_conn, job_ptr->details->submit_time, job_ptr->job_id, job_ptr->assoc_id); @@ -6067,9 +7085,9 @@ extern int jobacct_storage_p_suspend(mysql_conn_t *mysql_conn, "update %s set end=%d where id=%u && end=0;", suspend_table, (int)job_ptr->suspend_time, job_ptr->db_index); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_ERROR) { @@ -6078,7 +7096,7 @@ extern int jobacct_storage_p_suspend(mysql_conn_t *mysql_conn, "state=%d where id=%u and end=0", step_table, (int)job_ptr->suspend_time, job_ptr->job_state, job_ptr->db_index); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); } @@ -6101,30 +7119,34 @@ extern List jobacct_storage_p_get_jobs(mysql_conn_t *mysql_conn, List job_list = NULL; #ifdef HAVE_MYSQL acct_job_cond_t job_cond; - struct passwd *pw = NULL; if(_check_connection(mysql_conn) != SLURM_SUCCESS) return NULL; memset(&job_cond, 0, sizeof(acct_job_cond_t)); + job_cond.acct_list = selected_steps; job_cond.step_list = selected_steps; job_cond.partition_list = selected_parts; - if(params->opt_cluster) { - job_cond.cluster_list = list_create(NULL); - list_append(job_cond.cluster_list, params->opt_cluster); - } + job_cond.cluster_list = params->opt_cluster_list; + + if (params->opt_uid >=0) { + char *temp = xstrdup_printf("%u", params->opt_uid); + job_cond.userid_list = list_create(NULL); + list_append(job_cond.userid_list, temp); + } - if (params->opt_uid >=0 && (pw=getpwuid(params->opt_uid))) { - job_cond.user_list = list_create(NULL); - list_append(job_cond.user_list, pw->pw_name); + if (params->opt_gid >=0) { + char *temp = xstrdup_printf("%u", params->opt_gid); + job_cond.groupid_list = list_create(NULL); + list_append(job_cond.groupid_list, temp); } job_list = mysql_jobacct_process_get_jobs(mysql_conn, &job_cond); - if(job_cond.user_list) - list_destroy(job_cond.user_list); - if(job_cond.cluster_list) - list_destroy(job_cond.cluster_list); + if(job_cond.userid_list) + list_destroy(job_cond.userid_list); + if(job_cond.groupid_list) + list_destroy(job_cond.groupid_list); #endif return job_list; @@ -6190,11 +7212,11 @@ extern int acct_storage_p_flush_jobs_on_cluster( * the suspend table and the step table */ query = xstrdup_printf("select t1.id, t1.state from %s as t1, %s as t2 " - "where t2.id=t1.associd and t2.cluster='%s' " - "&& t1.end=0;", + "where ((t2.id=t1.associd and t2.cluster='%s') " + "|| !t1.associd) && t1.end=0;", job_table, assoc_table, cluster); if(!(result = - mysql_db_query_ret(mysql_conn->acct_mysql_db, query, 0))) { + mysql_db_query_ret(mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -6244,9 +7266,10 @@ extern int acct_storage_p_flush_jobs_on_cluster( /* job_table, assoc_table, JOB_CANCELLED, */ /* event_time, cluster); */ if(query) { - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); } #endif diff --git a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c index 7d669562f..094af22df 100644 --- a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c +++ b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c @@ -60,7 +60,7 @@ extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, char *table_level="t2"; MYSQL_RES *result = NULL, *step_result = NULL; MYSQL_ROW row, step_row; - int i; + int i, last_id = -1, curr_id = -1; jobacct_job_rec_t *job = NULL; jobacct_step_rec_t *step = NULL; time_t now = time(NULL); @@ -237,6 +237,24 @@ extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } + if(job_cond->userid_list && list_count(job_cond->userid_list)) { + set = 0; + if(extra) + xstrcat(extra, " && ("); + else + xstrcat(extra, " where ("); + + itr = list_iterator_create(job_cond->userid_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "t1.uid='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + if(job_cond->groupid_list && list_count(job_cond->groupid_list)) { set = 0; if(extra) @@ -302,48 +320,45 @@ extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, job_cond->usage_end, job_cond->usage_start); } - /* we need to put all the associations (t2) stuff together here */ - if(job_cond->cluster_list && list_count(job_cond->cluster_list)) { + if(job_cond->state_list && list_count(job_cond->state_list)) { set = 0; if(extra) xstrcat(extra, " && ("); else xstrcat(extra, " where ("); - itr = list_iterator_create(job_cond->cluster_list); + itr = list_iterator_create(job_cond->state_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); - xstrfmtcat(extra, "%s.cluster='%s'", - table_level, object); + xstrfmtcat(extra, "t1.state='%s'", object); set = 1; } list_iterator_destroy(itr); - /* just incase the association is gone */ - if(set) - xstrcat(extra, " || "); - xstrfmtcat(extra, "%s.cluster is null)", table_level); + xstrcat(extra, ")"); } - if(job_cond->user_list && list_count(job_cond->user_list)) { + /* we need to put all the associations (t2) stuff together here */ + if(job_cond->cluster_list && list_count(job_cond->cluster_list)) { set = 0; if(extra) xstrcat(extra, " && ("); else xstrcat(extra, " where ("); - itr = list_iterator_create(job_cond->user_list); + itr = list_iterator_create(job_cond->cluster_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); - xstrfmtcat(extra, "%s.user='%s'", table_level, object); + xstrfmtcat(extra, "%s.cluster='%s'", + table_level, object); set = 1; } list_iterator_destroy(itr); /* just incase the association is gone */ if(set) xstrcat(extra, " || "); - xstrfmtcat(extra, "%s.user is null)", table_level); + xstrfmtcat(extra, "%s.cluster is null)", table_level); } no_cond: @@ -362,10 +377,15 @@ no_cond: xstrcat(query, extra); xfree(extra); } - + /* Here we want to order them this way in such a way so it is + easy to look for duplicates + */ + if(job_cond && !job_cond->duplicates) + xstrcat(query, " order by jobid, submit desc"); + debug3("%d query\n%s", mysql_conn->conn, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); list_destroy(job_list); return NULL; @@ -374,7 +394,14 @@ no_cond: while((row = mysql_fetch_row(result))) { char *id = row[JOB_REQ_ID]; + + curr_id = atoi(row[JOB_REQ_JOBID]); + + if(job_cond && !job_cond->duplicates && curr_id == last_id) + continue; + last_id = curr_id; + job = create_jobacct_job_rec(); job->alloc_cpus = atoi(row[JOB_REQ_ALLOC_CPUS]); @@ -400,7 +427,7 @@ no_cond: job->submit = atoi(row[JOB_REQ_SUBMIT]); job->start = atoi(row[JOB_REQ_START]); job->end = atoi(row[JOB_REQ_END]); - if(job_cond->usage_start) { + if(job_cond && job_cond->usage_start) { if(job->start && (job->start < job_cond->usage_start)) job->start = job_cond->usage_start; @@ -424,11 +451,11 @@ no_cond: suspend_table, job_cond->usage_end, job_cond->usage_start, - row[JOB_REQ_ID]); + id); debug4("%d query\n%s", mysql_conn->conn, query); if(!(result2 = mysql_db_query_ret( - mysql_conn->acct_mysql_db, + mysql_conn->db_conn, query, 0))) { list_destroy(job_list); job_list = NULL; @@ -470,7 +497,7 @@ no_cond: job->elapsed -= job->suspended; } - job->jobid = atoi(row[JOB_REQ_JOBID]); + job->jobid = curr_id; job->jobname = xstrdup(row[JOB_REQ_NAME]); job->gid = atoi(row[JOB_REQ_GID]); job->exitcode = atoi(row[JOB_REQ_COMP_CODE]); @@ -534,7 +561,7 @@ no_cond: //info("query = %s", query); if(!(step_result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); list_destroy(job_list); return NULL; diff --git a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.h b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.h index c04fee6c4..983d6db2f 100644 --- a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.h +++ b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.h @@ -53,16 +53,7 @@ #include "src/database/mysql_common.h" #include "src/common/slurm_accounting_storage.h" -#ifndef HAVE_MYSQL -typedef void mysql_conn_t; -#else - -typedef struct { - MYSQL *acct_mysql_db; - bool rollback; - List update_list; - int conn; -} mysql_conn_t; +#ifdef HAVE_MYSQL //extern int acct_db_init; diff --git a/src/plugins/accounting_storage/mysql/mysql_rollup.c b/src/plugins/accounting_storage/mysql/mysql_rollup.c index 63e02bbce..4c0166589 100644 --- a/src/plugins/accounting_storage/mysql/mysql_rollup.c +++ b/src/plugins/accounting_storage/mysql/mysql_rollup.c @@ -189,7 +189,7 @@ extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn, debug3("%d query\n%s", mysql_conn->conn, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -290,7 +290,7 @@ extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn, debug3("%d query\n%s", mysql_conn->conn, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -344,7 +344,7 @@ extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn, debug4("%d query\n%s", mysql_conn->conn, query); if(!(result2 = mysql_db_query_ret( - mysql_conn->acct_mysql_db, + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; @@ -510,7 +510,7 @@ extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn, "over_cpu_secs=VALUES(over_cpu_secs), " "resv_cpu_secs=VALUES(resv_cpu_secs)", now); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't add cluster hour rollup"); @@ -548,7 +548,7 @@ extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn, now); debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't add assoc hour rollup"); @@ -633,7 +633,7 @@ extern int mysql_daily_rollup(mysql_conn_t *mysql_conn, cluster_hour_table, curr_end, curr_start, now); debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't add day rollup"); @@ -658,7 +658,7 @@ extern int mysql_daily_rollup(mysql_conn_t *mysql_conn, */ query = xstrdup_printf("delete from %s where end < %d && end != 0", suspend_table, start); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't remove old suspend data"); @@ -729,7 +729,7 @@ extern int mysql_monthly_rollup(mysql_conn_t *mysql_conn, cluster_day_table, curr_end, curr_start, now); debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't add day rollup"); @@ -756,7 +756,7 @@ extern int mysql_monthly_rollup(mysql_conn_t *mysql_conn, query = xstrdup_printf("delete from %s where period_end < %d " "&& period_end != 0", event_table, start); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't remove old event data"); diff --git a/src/plugins/accounting_storage/none/accounting_storage_none.c b/src/plugins/accounting_storage/none/accounting_storage_none.c index 33d85ef18..2cd7c7df9 100644 --- a/src/plugins/accounting_storage/none/accounting_storage_none.c +++ b/src/plugins/accounting_storage/none/accounting_storage_none.c @@ -132,6 +132,12 @@ extern int acct_storage_p_add_associations(void *db_conn, uint32_t uid, return SLURM_SUCCESS; } +extern int acct_storage_p_add_qos(void *db_conn, uint32_t uid, + List qos_list) +{ + return SLURM_SUCCESS; +} + extern List acct_storage_p_modify_users(void *db_conn, uint32_t uid, acct_user_cond_t *user_q, acct_user_rec_t *user) @@ -191,6 +197,12 @@ extern List acct_storage_p_remove_associations(void *db_conn, uint32_t uid, return SLURM_SUCCESS; } +extern List acct_storage_p_remove_qos(void *db_conn, uint32_t uid, + acct_qos_cond_t *qos_cond) +{ + return NULL; +} + extern List acct_storage_p_get_users(void *db_conn, acct_user_cond_t *user_q) { @@ -215,6 +227,18 @@ extern List acct_storage_p_get_associations(void *db_conn, return NULL; } +extern List acct_storage_p_get_qos(void *db_conn, + acct_qos_cond_t *qos_cond) +{ + return NULL; +} + +extern List acct_storage_p_get_txn(void *db_conn, + acct_txn_cond_t *txn_cond) +{ + return NULL; +} + extern int acct_storage_p_get_usage(void *db_conn, acct_association_rec_t *acct_assoc, time_t start, time_t end) diff --git a/src/plugins/accounting_storage/pgsql/Makefile.am b/src/plugins/accounting_storage/pgsql/Makefile.am index 3c0a2833a..c0c2f0637 100644 --- a/src/plugins/accounting_storage/pgsql/Makefile.am +++ b/src/plugins/accounting_storage/pgsql/Makefile.am @@ -15,6 +15,7 @@ accounting_storage_pgsql_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) accounting_storage_pgsql_la_CFLAGS = $(PGSQL_CFLAGS) accounting_storage_pgsql_la_LIBADD = \ $(top_builddir)/src/database/libslurm_pgsql.la $(PGSQL_LIBS) -accounting_storage_pgsql_la_DEPENDENCIES = \ - $(top_builddir)/src/database/libslurm_pgsql.la +force: +$(accounting_storage_pgsql_la_LIBADD) : force + @cd `dirname $@` && $(MAKE) `basename $@` diff --git a/src/plugins/accounting_storage/pgsql/Makefile.in b/src/plugins/accounting_storage/pgsql/Makefile.in index c7c14a438..7bb7cda71 100644 --- a/src/plugins/accounting_storage/pgsql/Makefile.in +++ b/src/plugins/accounting_storage/pgsql/Makefile.in @@ -76,6 +76,9 @@ am__installdirs = "$(DESTDIR)$(pkglibdir)" pkglibLTLIBRARIES_INSTALL = $(INSTALL) LTLIBRARIES = $(pkglib_LTLIBRARIES) am__DEPENDENCIES_1 = +accounting_storage_pgsql_la_DEPENDENCIES = \ + $(top_builddir)/src/database/libslurm_pgsql.la \ + $(am__DEPENDENCIES_1) am_accounting_storage_pgsql_la_OBJECTS = \ accounting_storage_pgsql_la-accounting_storage_pgsql.lo \ accounting_storage_pgsql_la-pgsql_jobacct_process.lo @@ -283,9 +286,6 @@ accounting_storage_pgsql_la_CFLAGS = $(PGSQL_CFLAGS) accounting_storage_pgsql_la_LIBADD = \ $(top_builddir)/src/database/libslurm_pgsql.la $(PGSQL_LIBS) -accounting_storage_pgsql_la_DEPENDENCIES = \ - $(top_builddir)/src/database/libslurm_pgsql.la - all: all-am .SUFFIXES: @@ -578,6 +578,10 @@ uninstall-am: uninstall-pkglibLTLIBRARIES mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES + +force: +$(accounting_storage_pgsql_la_LIBADD) : force + @cd `dirname $@` && $(MAKE) `basename $@` # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c index 70f748e66..41d5987ec 100644 --- a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c +++ b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c @@ -93,6 +93,7 @@ char *cluster_month_table = "cluster_month_usage_table"; char *cluster_table = "cluster_table"; char *event_table = "cluster_event_table"; char *job_table = "job_table"; +char *qos_table = "qos_table"; char *step_table = "step_table"; char *txn_table = "txn_table"; char *user_table = "user_table"; @@ -111,7 +112,7 @@ static int _get_db_index(PGconn *acct_pgsql_db, if(!(result = pgsql_db_query_ret(acct_pgsql_db, query))) { xfree(query); return -1; - } + } xfree(query); @@ -149,6 +150,8 @@ static int _pgsql_acct_check_tables(PGconn *acct_pgsql_db, char *user) { storage_field_t acct_coord_table_fields[] = { + { "creation_time", "bigint not null" }, + { "mod_time", "bigint default 0" }, { "deleted", "smallint default 0" }, { "acct", "text not null" }, { "user_name", "text not null" }, @@ -162,7 +165,7 @@ static int _pgsql_acct_check_tables(PGconn *acct_pgsql_db, { "name", "text not null" }, { "description", "text not null" }, { "organization", "text not null" }, - { "qos", "smallint default 1 not null" }, + { "qos", "text not null" }, { NULL, NULL} }; @@ -265,6 +268,16 @@ static int _pgsql_acct_check_tables(PGconn *acct_pgsql_db, { NULL, NULL} }; + storage_field_t qos_table_fields[] = { + { "creation_time", "bigint not null" }, + { "mod_time", "bigint default 0" }, + { "deleted", "smallint default 0" }, + { "id", "serial" }, + { "name", "text not null" }, + { "description", "text" }, + { NULL, NULL} + }; + storage_field_t step_table_fields[] = { { "id", "int not null" }, { "stepid", "smallint not null" }, @@ -330,7 +343,7 @@ static int _pgsql_acct_check_tables(PGconn *acct_pgsql_db, }; int i = 0, job_found = 0; - int step_found = 0, txn_found = 0, event_found = 0; + int step_found = 0, txn_found = 0, event_found = 0, qos_found = 0; int user_found = 0, acct_found = 0, acct_coord_found = 0; int cluster_found = 0, cluster_hour_found = 0, cluster_day_found = 0, cluster_month_found = 0; @@ -390,6 +403,9 @@ static int _pgsql_acct_check_tables(PGconn *acct_pgsql_db, else if(!last_ran_found && !strcmp(last_ran_table, PQgetvalue(result, i, 0))) last_ran_found = 1; + else if(!qos_found && + !strcmp(qos_table, PQgetvalue(result, i, 0))) + qos_found = 1; else if(!step_found && !strcmp(step_table, PQgetvalue(result, i, 0))) step_found = 1; @@ -589,6 +605,20 @@ static int _pgsql_acct_check_tables(PGconn *acct_pgsql_db, return SLURM_ERROR; } + if(!qos_found) { + if(pgsql_db_create_table(acct_pgsql_db, + qos_table, qos_table_fields, + ", unique (name))") + == SLURM_ERROR) + return SLURM_ERROR; + + } else { + if(pgsql_db_make_table_current(acct_pgsql_db, + step_table, + step_table_fields)) + return SLURM_ERROR; + } + if(!step_found) { if(pgsql_db_create_table(acct_pgsql_db, step_table, step_table_fields, @@ -764,7 +794,7 @@ extern int acct_storage_p_add_users(PGconn *acct_pgsql_db, uint32_t uid, } extern int acct_storage_p_add_coord(PGconn *acct_pgsql_db, uint32_t uid, - List acct_list, acct_user_cond_t *user_q) + List acct_list, acct_user_cond_t *user_cond) { return SLURM_SUCCESS; } @@ -787,87 +817,111 @@ extern int acct_storage_p_add_associations(PGconn *acct_pgsql_db, uint32_t uid, return SLURM_SUCCESS; } +extern int acct_storage_p_add_qos(PGconn *acct_pgsql_db, uint32_t uid, + List qos_list) +{ + return SLURM_SUCCESS; +} + extern List acct_storage_p_modify_users(PGconn *acct_pgsql_db, uint32_t uid, - acct_user_cond_t *user_q, - acct_user_rec_t *user) + acct_user_cond_t *user_cond, + acct_user_rec_t *user) { return SLURM_SUCCESS; } extern List acct_storage_p_modify_accounts(PGconn *acct_pgsql_db, uint32_t uid, - acct_account_cond_t *acct_q, + acct_account_cond_t *acct_cond, acct_account_rec_t *acct) { return SLURM_SUCCESS; } extern List acct_storage_p_modify_clusters(PGconn *acct_pgsql_db, uint32_t uid, - acct_cluster_cond_t *cluster_q, - acct_cluster_rec_t *cluster) + acct_cluster_cond_t *cluster_cond, + acct_cluster_rec_t *cluster) { return SLURM_SUCCESS; } -extern List acct_storage_p_modify_associations(PGconn *acct_pgsql_db, - uint32_t uid, - acct_association_cond_t *assoc_q, - acct_association_rec_t *assoc) +extern List acct_storage_p_modify_associations( + PGconn *acct_pgsql_db, uint32_t uid, + acct_association_cond_t *assoc_cond, + acct_association_rec_t *assoc) { return SLURM_SUCCESS; } extern List acct_storage_p_remove_users(PGconn *acct_pgsql_db, uint32_t uid, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { return SLURM_SUCCESS; } extern List acct_storage_p_remove_coord(PGconn *acct_pgsql_db, uint32_t uid, List acct_list, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { return SLURM_SUCCESS; } extern List acct_storage_p_remove_accts(PGconn *acct_pgsql_db, uint32_t uid, - acct_account_cond_t *acct_q) + acct_account_cond_t *acct_cond) { return SLURM_SUCCESS; } extern List acct_storage_p_remove_clusters(PGconn *acct_pgsql_db, uint32_t uid, - acct_account_cond_t *cluster_q) + acct_account_cond_t *cluster_cond) { return SLURM_SUCCESS; } -extern List acct_storage_p_remove_associations(PGconn *acct_pgsql_db, - uint32_t uid, - acct_association_cond_t *assoc_q) +extern List acct_storage_p_remove_associations( + PGconn *acct_pgsql_db, uint32_t uid, + acct_association_cond_t *assoc_cond) { return SLURM_SUCCESS; } +extern List acct_storage_p_remove_qos(void *db_conn, uint32_t uid, + acct_qos_cond_t *qos_cond) +{ + return NULL; +} + extern List acct_storage_p_get_users(PGconn *acct_pgsql_db, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { return NULL; } extern List acct_storage_p_get_accts(PGconn *acct_pgsql_db, - acct_account_cond_t *acct_q) + acct_account_cond_t *acct_cond) { return NULL; } extern List acct_storage_p_get_clusters(PGconn *acct_pgsql_db, - acct_account_cond_t *cluster_q) + acct_account_cond_t *cluster_cond) { return NULL; } extern List acct_storage_p_get_associations(PGconn *acct_pgsql_db, - acct_association_cond_t *assoc_q) + acct_association_cond_t *assoc_cond) +{ + return NULL; +} + +extern List acct_storage_p_get_qos(void *db_conn, + acct_qos_cond_t *qos_cond) +{ + return NULL; +} + +extern List acct_storage_p_get_txn(PGconn *acct_pgsql_db, + acct_txn_cond_t *txn_cond) { return NULL; } @@ -1018,8 +1072,8 @@ extern int clusteracct_storage_p_cluster_procs(PGconn *acct_pgsql_db, goto end_it; add_it: query = xstrdup_printf( - "insert into %s (cluster, cpu_count, period_start) " - "values ('%s', %u, %d)", + "insert into %s (cluster, cpu_count, period_start, reason) " + "values ('%s', %u, %d, 'Cluster processor count')", event_table, cluster, procs, event_time); rc = pgsql_db_query(acct_pgsql_db, query); xfree(query); @@ -1516,7 +1570,6 @@ extern List jobacct_storage_p_get_jobs(PGconn *acct_pgsql_db, List job_list = NULL; #ifdef HAVE_PGSQL acct_job_cond_t job_cond; - struct passwd *pw = NULL; if(!acct_pgsql_db || PQstatus(acct_pgsql_db) != CONNECTION_OK) { if(!pgsql_get_db_connection(&acct_pgsql_db, @@ -1526,24 +1579,30 @@ extern List jobacct_storage_p_get_jobs(PGconn *acct_pgsql_db, memset(&job_cond, 0, sizeof(acct_job_cond_t)); + job_cond.acct_list = selected_steps; job_cond.step_list = selected_steps; job_cond.partition_list = selected_parts; - if(params->opt_cluster) { - job_cond.cluster_list = list_create(NULL); - list_append(job_cond.cluster_list, params->opt_cluster); - } + job_cond.cluster_list = params->opt_cluster_list; - if (params->opt_uid >=0 && (pw=getpwuid(params->opt_uid))) { - job_cond.user_list = list_create(NULL); - list_append(job_cond.user_list, pw->pw_name); + if (params->opt_uid >=0) { + char *temp = xstrdup_printf("%u", params->opt_uid); + job_cond.userid_list = list_create(NULL); + list_append(job_cond.userid_list, temp); + } + + if (params->opt_gid >=0) { + char *temp = xstrdup_printf("%u", params->opt_gid); + job_cond.groupid_list = list_create(NULL); + list_append(job_cond.groupid_list, temp); } job_list = pgsql_jobacct_process_get_jobs(acct_pgsql_db, &job_cond); - if(job_cond.user_list) - list_destroy(job_cond.user_list); - if(job_cond.cluster_list) - list_destroy(job_cond.cluster_list); + if(job_cond.userid_list) + list_destroy(job_cond.userid_list); + if(job_cond.groupid_list) + list_destroy(job_cond.groupid_list); + #endif return job_list; } diff --git a/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.c b/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.c index 94e97377b..a534e4d80 100644 --- a/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.c +++ b/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.c @@ -58,7 +58,7 @@ extern List pgsql_jobacct_process_get_jobs(PGconn *acct_pgsql_db, int set = 0; char *table_level="t2"; PGresult *result = NULL, *step_result = NULL; - int i, j; + int i, j, last_id = -1, curr_id = -1; jobacct_job_rec_t *job = NULL; jobacct_step_rec_t *step = NULL; time_t now = time(NULL); @@ -252,6 +252,24 @@ extern List pgsql_jobacct_process_get_jobs(PGconn *acct_pgsql_db, xstrcat(extra, ")"); } + if(job_cond->userid_list && list_count(job_cond->userid_list)) { + set = 0; + if(extra) + xstrcat(extra, " && ("); + else + xstrcat(extra, " where ("); + + itr = list_iterator_create(job_cond->userid_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "t1.uid='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + if(job_cond->partition_list && list_count(job_cond->partition_list)) { set = 0; if(extra) @@ -300,41 +318,37 @@ extern List pgsql_jobacct_process_get_jobs(PGconn *acct_pgsql_db, job_cond->usage_end, job_cond->usage_start); } - /* we need to put all the associations (t2) stuff together here */ - if(job_cond->cluster_list && list_count(job_cond->cluster_list)) { + if(job_cond->state_list && list_count(job_cond->state_list)) { set = 0; if(extra) - xstrcat(extra, " and ("); + xstrcat(extra, " && ("); else xstrcat(extra, " where ("); - itr = list_iterator_create(job_cond->cluster_list); + itr = list_iterator_create(job_cond->state_list); while((object = list_next(itr))) { if(set) - xstrcat(extra, " or "); - xstrfmtcat(extra, "%s.cluster='%s'", - table_level, object); + xstrcat(extra, " || "); + xstrfmtcat(extra, "t1.state='%s'", object); set = 1; } list_iterator_destroy(itr); - /* just incase the association is gone */ - if(set) - xstrcat(extra, " or "); - xstrfmtcat(extra, "%s.cluster is null)", table_level); + xstrcat(extra, ")"); } - if(job_cond->user_list && list_count(job_cond->user_list)) { + /* we need to put all the associations (t2) stuff together here */ + if(job_cond->cluster_list && list_count(job_cond->cluster_list)) { set = 0; if(extra) xstrcat(extra, " and ("); else xstrcat(extra, " where ("); - itr = list_iterator_create(job_cond->user_list); + itr = list_iterator_create(job_cond->cluster_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " or "); - xstrfmtcat(extra, "%s.user_name='%s'", + xstrfmtcat(extra, "%s.cluster='%s'", table_level, object); set = 1; } @@ -342,7 +356,7 @@ extern List pgsql_jobacct_process_get_jobs(PGconn *acct_pgsql_db, /* just incase the association is gone */ if(set) xstrcat(extra, " or "); - xstrfmtcat(extra, "%s.user_name is null)", table_level); + xstrfmtcat(extra, "%s.cluster is null)", table_level); } no_cond: @@ -361,6 +375,11 @@ no_cond: xstrcat(query, extra); xfree(extra); } + /* Here we want to order them this way in such a way so it is + easy to look for duplicates + */ + if(job_cond && !job_cond->duplicates) + xstrcat(query, " order by jobid, submit desc"); debug3("query\n%s", query); if(!(result = pgsql_db_query_ret(acct_pgsql_db, query))) { @@ -373,6 +392,13 @@ no_cond: for (i = 0; i < PQntuples(result); i++) { char *id = PQgetvalue(result, i, JOB_REQ_ID); + curr_id = atoi(PQgetvalue(result, i, JOB_REQ_JOBID)); + + if(job_cond && !job_cond->duplicates && curr_id == last_id) + continue; + + last_id = curr_id; + job = create_jobacct_job_rec(); job->alloc_cpus = atoi(PQgetvalue(result, i, @@ -411,7 +437,76 @@ no_cond: } job->elapsed -= job->suspended; - job->jobid = atoi(PQgetvalue(result, i, JOB_REQ_JOBID)); + if(job_cond && job_cond->usage_start) { + if(job->start && (job->start < job_cond->usage_start)) + job->start = job_cond->usage_start; + + if(!job->start && job->end) + job->start = job->end; + + if(!job->end || job->end > job_cond->usage_end) + job->end = job_cond->usage_end; + + job->elapsed = job->end - job->start; + + if(atoi(PQgetvalue(result, i, JOB_REQ_SUSPENDED))) { + PGresult *result2 = NULL; + int i2=0; + /* get the suspended time for this job */ + query = xstrdup_printf( + "select start, end from %s where " + "(start < %d && (end >= %d " + "|| end = 0)) && id=%s " + "order by start", + suspend_table, + job_cond->usage_end, + job_cond->usage_start, + id); + + debug4("query\n%s", query); + if(!(result2 = pgsql_db_query_ret( + acct_pgsql_db, query))) { + list_destroy(job_list); + job_list = NULL; + break; + } + xfree(query); + for (i2 = 0; i2 < PQntuples(result2); i2++) { + int local_start = + atoi(PQgetvalue(result, i2, 0)); + int local_end = + atoi(PQgetvalue(result, i2, 1)); + + if(!local_start) + continue; + + if(job->start > local_start) + local_start = job->start; + if(job->end < local_end) + local_end = job->end; + + if((local_end - local_start) < 1) + continue; + + job->elapsed -= + (local_end - local_start); + job->suspended += + (local_end - local_start); + } + PQclear(result2); + } + } else { + job->suspended = + atoi(PQgetvalue(result, i, JOB_REQ_SUSPENDED)); + if(!job->end) { + job->elapsed = now - job->start; + } else { + job->elapsed = job->end - job->start; + } + job->elapsed -= job->suspended; + } + + job->jobid = curr_id; job->jobname = xstrdup(PQgetvalue(result, i, JOB_REQ_NAME)); job->gid = atoi(PQgetvalue(result, i, JOB_REQ_GID)); job->exitcode = atoi(PQgetvalue(result, i, JOB_REQ_COMP_CODE)); diff --git a/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.h b/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.h index 19575d88b..be5ff81c4 100644 --- a/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.h +++ b/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.h @@ -58,6 +58,7 @@ extern char *assoc_table; extern char *job_table; extern char *step_table; +extern char *suspend_table; extern List pgsql_jobacct_process_get_jobs(PGconn *acct_pgsql_db, acct_job_cond_t *job_cond); diff --git a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c index 9d602efaf..cda102757 100644 --- a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c +++ b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c @@ -183,14 +183,14 @@ extern int acct_storage_p_add_users(void *db_conn, uint32_t uid, List user_list) } extern int acct_storage_p_add_coord(void *db_conn, uint32_t uid, - List acct_list, acct_user_cond_t *user_q) + List acct_list, acct_user_cond_t *user_cond) { slurmdbd_msg_t req; dbd_acct_coord_msg_t get_msg; int rc, resp_code; get_msg.acct_list = acct_list; - get_msg.cond = user_q; + get_msg.cond = user_cond; req.msg_type = DBD_ADD_ACCOUNT_COORDS; req.data = &get_msg; @@ -220,7 +220,8 @@ extern int acct_storage_p_add_accts(void *db_conn, uint32_t uid, List acct_list) return rc; } -extern int acct_storage_p_add_clusters(void *db_conn, uint32_t uid, List cluster_list) +extern int acct_storage_p_add_clusters(void *db_conn, uint32_t uid, + List cluster_list) { slurmdbd_msg_t req; dbd_list_msg_t get_msg; @@ -258,9 +259,28 @@ extern int acct_storage_p_add_associations(void *db_conn, uint32_t uid, return rc; } +extern int acct_storage_p_add_qos(void *db_conn, uint32_t uid, + List qos_list) +{ + slurmdbd_msg_t req; + dbd_list_msg_t get_msg; + int rc, resp_code; + + get_msg.my_list = qos_list; + + req.msg_type = DBD_ADD_QOS; + req.data = &get_msg; + rc = slurm_send_slurmdbd_recv_rc_msg(&req, &resp_code); + + if(resp_code != SLURM_SUCCESS) + rc = resp_code; + + return rc; +} + extern List acct_storage_p_modify_users(void *db_conn, uint32_t uid, - acct_user_cond_t *user_q, - acct_user_rec_t *user) + acct_user_cond_t *user_cond, + acct_user_rec_t *user) { slurmdbd_msg_t req, resp; dbd_modify_msg_t get_msg; @@ -268,7 +288,7 @@ extern List acct_storage_p_modify_users(void *db_conn, uint32_t uid, List ret_list = NULL; int rc; - get_msg.cond = user_q; + get_msg.cond = user_cond; get_msg.rec = user; req.msg_type = DBD_MODIFY_USERS; @@ -299,7 +319,7 @@ extern List acct_storage_p_modify_users(void *db_conn, uint32_t uid, } extern List acct_storage_p_modify_accounts(void *db_conn, uint32_t uid, - acct_account_cond_t *acct_q, + acct_account_cond_t *acct_cond, acct_account_rec_t *acct) { slurmdbd_msg_t req, resp; @@ -308,7 +328,7 @@ extern List acct_storage_p_modify_accounts(void *db_conn, uint32_t uid, int rc; List ret_list = NULL; - get_msg.cond = acct_q; + get_msg.cond = acct_cond; get_msg.rec = acct; req.msg_type = DBD_MODIFY_ACCOUNTS; @@ -339,8 +359,8 @@ extern List acct_storage_p_modify_accounts(void *db_conn, uint32_t uid, } extern List acct_storage_p_modify_clusters(void *db_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q, - acct_cluster_rec_t *cluster) + acct_cluster_cond_t *cluster_cond, + acct_cluster_rec_t *cluster) { slurmdbd_msg_t req; dbd_modify_msg_t get_msg; @@ -349,7 +369,7 @@ extern List acct_storage_p_modify_clusters(void *db_conn, uint32_t uid, dbd_list_msg_t *got_msg; List ret_list = NULL; - get_msg.cond = cluster_q; + get_msg.cond = cluster_cond; get_msg.rec = cluster; req.msg_type = DBD_MODIFY_CLUSTERS; @@ -380,9 +400,10 @@ extern List acct_storage_p_modify_clusters(void *db_conn, uint32_t uid, return ret_list; } -extern List acct_storage_p_modify_associations(void *db_conn, uint32_t uid, - acct_association_cond_t *assoc_q, - acct_association_rec_t *assoc) +extern List acct_storage_p_modify_associations( + void *db_conn, uint32_t uid, + acct_association_cond_t *assoc_cond, + acct_association_rec_t *assoc) { slurmdbd_msg_t req; dbd_modify_msg_t get_msg; @@ -392,7 +413,7 @@ extern List acct_storage_p_modify_associations(void *db_conn, uint32_t uid, List ret_list = NULL; - get_msg.cond = assoc_q; + get_msg.cond = assoc_cond; get_msg.rec = assoc; req.msg_type = DBD_MODIFY_ASSOCS; @@ -423,7 +444,7 @@ extern List acct_storage_p_modify_associations(void *db_conn, uint32_t uid, } extern List acct_storage_p_remove_users(void *db_conn, uint32_t uid, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { slurmdbd_msg_t req; dbd_cond_msg_t get_msg; @@ -433,7 +454,7 @@ extern List acct_storage_p_remove_users(void *db_conn, uint32_t uid, List ret_list = NULL; - get_msg.cond = user_q; + get_msg.cond = user_cond; req.msg_type = DBD_REMOVE_USERS; req.data = &get_msg; @@ -464,7 +485,7 @@ extern List acct_storage_p_remove_users(void *db_conn, uint32_t uid, extern List acct_storage_p_remove_coord(void *db_conn, uint32_t uid, List acct_list, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { slurmdbd_msg_t req; dbd_acct_coord_msg_t get_msg; @@ -475,7 +496,7 @@ extern List acct_storage_p_remove_coord(void *db_conn, uint32_t uid, get_msg.acct_list = acct_list; - get_msg.cond = user_q; + get_msg.cond = user_cond; req.msg_type = DBD_REMOVE_ACCOUNT_COORDS; req.data = &get_msg; @@ -505,7 +526,7 @@ extern List acct_storage_p_remove_coord(void *db_conn, uint32_t uid, } extern List acct_storage_p_remove_accts(void *db_conn, uint32_t uid, - acct_account_cond_t *acct_q) + acct_account_cond_t *acct_cond) { slurmdbd_msg_t req; dbd_cond_msg_t get_msg; @@ -515,7 +536,7 @@ extern List acct_storage_p_remove_accts(void *db_conn, uint32_t uid, List ret_list = NULL; - get_msg.cond = acct_q; + get_msg.cond = acct_cond; req.msg_type = DBD_REMOVE_ACCOUNTS; req.data = &get_msg; @@ -545,7 +566,7 @@ extern List acct_storage_p_remove_accts(void *db_conn, uint32_t uid, } extern List acct_storage_p_remove_clusters(void *db_conn, uint32_t uid, - acct_account_cond_t *cluster_q) + acct_account_cond_t *cluster_cond) { slurmdbd_msg_t req; dbd_cond_msg_t get_msg; @@ -555,7 +576,7 @@ extern List acct_storage_p_remove_clusters(void *db_conn, uint32_t uid, List ret_list = NULL; - get_msg.cond = cluster_q; + get_msg.cond = cluster_cond; req.msg_type = DBD_REMOVE_CLUSTERS; req.data = &get_msg; @@ -584,8 +605,9 @@ extern List acct_storage_p_remove_clusters(void *db_conn, uint32_t uid, return ret_list; } -extern List acct_storage_p_remove_associations(void *db_conn, uint32_t uid, - acct_association_cond_t *assoc_q) +extern List acct_storage_p_remove_associations( + void *db_conn, uint32_t uid, + acct_association_cond_t *assoc_cond) { slurmdbd_msg_t req; dbd_cond_msg_t get_msg; @@ -595,7 +617,7 @@ extern List acct_storage_p_remove_associations(void *db_conn, uint32_t uid, List ret_list = NULL; - get_msg.cond = assoc_q; + get_msg.cond = assoc_cond; req.msg_type = DBD_REMOVE_ASSOCS; req.data = &get_msg; @@ -624,8 +646,49 @@ extern List acct_storage_p_remove_associations(void *db_conn, uint32_t uid, return ret_list; } +extern List acct_storage_p_remove_qos( + void *db_conn, uint32_t uid, + acct_qos_cond_t *qos_cond) +{ + slurmdbd_msg_t req; + dbd_cond_msg_t get_msg; + int rc; + slurmdbd_msg_t resp; + dbd_list_msg_t *got_msg; + List ret_list = NULL; + + + get_msg.cond = qos_cond; + + req.msg_type = DBD_REMOVE_QOS; + req.data = &get_msg; + rc = slurm_send_recv_slurmdbd_msg(&req, &resp); + + if (rc != SLURM_SUCCESS) + error("slurmdbd: DBD_REMOVE_QOS failure: %m"); + else if (resp.msg_type == DBD_RC) { + dbd_rc_msg_t *msg = resp.data; + if(msg->return_code == SLURM_SUCCESS) { + info("%s", msg->comment); + ret_list = list_create(NULL); + } else + error("%s", msg->comment); + slurmdbd_free_rc_msg(msg); + } else if (resp.msg_type != DBD_GOT_LIST) { + error("slurmdbd: response type not DBD_GOT_LIST: %u", + resp.msg_type); + } else { + got_msg = (dbd_list_msg_t *) resp.data; + ret_list = got_msg->my_list; + got_msg->my_list = NULL; + slurmdbd_free_list_msg(got_msg); + } + + return ret_list; +} + extern List acct_storage_p_get_users(void *db_conn, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { slurmdbd_msg_t req, resp; dbd_cond_msg_t get_msg; @@ -633,7 +696,7 @@ extern List acct_storage_p_get_users(void *db_conn, int rc; List ret_list = NULL; - get_msg.cond = user_q; + get_msg.cond = user_cond; req.msg_type = DBD_GET_USERS; req.data = &get_msg; @@ -655,7 +718,7 @@ extern List acct_storage_p_get_users(void *db_conn, } extern List acct_storage_p_get_accts(void *db_conn, - acct_account_cond_t *acct_q) + acct_account_cond_t *acct_cond) { slurmdbd_msg_t req, resp; dbd_cond_msg_t get_msg; @@ -663,7 +726,7 @@ extern List acct_storage_p_get_accts(void *db_conn, int rc; List ret_list = NULL; - get_msg.cond = acct_q; + get_msg.cond = acct_cond; req.msg_type = DBD_GET_ACCOUNTS; req.data = &get_msg; @@ -686,7 +749,7 @@ extern List acct_storage_p_get_accts(void *db_conn, } extern List acct_storage_p_get_clusters(void *db_conn, - acct_account_cond_t *cluster_q) + acct_account_cond_t *cluster_cond) { slurmdbd_msg_t req, resp; dbd_cond_msg_t get_msg; @@ -694,7 +757,7 @@ extern List acct_storage_p_get_clusters(void *db_conn, int rc; List ret_list = NULL; - get_msg.cond = cluster_q; + get_msg.cond = cluster_cond; req.msg_type = DBD_GET_CLUSTERS; req.data = &get_msg; @@ -717,16 +780,15 @@ extern List acct_storage_p_get_clusters(void *db_conn, } extern List acct_storage_p_get_associations(void *db_conn, - acct_association_cond_t *assoc_q) + acct_association_cond_t *assoc_cond) { - slurmdbd_msg_t req, resp; dbd_cond_msg_t get_msg; dbd_list_msg_t *got_msg; int rc; List ret_list = NULL; - get_msg.cond = assoc_q; + get_msg.cond = assoc_cond; req.msg_type = DBD_GET_ASSOCS; req.data = &get_msg; @@ -744,6 +806,72 @@ extern List acct_storage_p_get_associations(void *db_conn, slurmdbd_free_list_msg(got_msg); } + return ret_list; +} + +extern List acct_storage_p_get_qos(void *db_conn, + acct_qos_cond_t *qos_cond) +{ + slurmdbd_msg_t req, resp; + dbd_cond_msg_t get_msg; + dbd_list_msg_t *got_msg; + int rc; + List ret_list = NULL; + + get_msg.cond = qos_cond; + + req.msg_type = DBD_GET_QOS; + req.data = &get_msg; + rc = slurm_send_recv_slurmdbd_msg(&req, &resp); + + if (rc != SLURM_SUCCESS) + error("slurmdbd: DBD_GET_QOS failure: %m"); + else if (resp.msg_type != DBD_GOT_QOS) { + error("slurmdbd: response type not DBD_GOT_QOS: %u", + resp.msg_type); + } else { + got_msg = (dbd_list_msg_t *) resp.data; + /* do this just for this type since it could be called + * multiple times, and if we send back and empty list + * instead of no list we will only call this once. + */ + if(!got_msg->my_list) + ret_list = list_create(NULL); + else + ret_list = got_msg->my_list; + got_msg->my_list = NULL; + slurmdbd_free_list_msg(got_msg); + } + + return ret_list; +} + +extern List acct_storage_p_get_txn(void *db_conn, + acct_txn_cond_t *txn_cond) +{ + slurmdbd_msg_t req, resp; + dbd_cond_msg_t get_msg; + dbd_list_msg_t *got_msg; + int rc; + List ret_list = NULL; + + get_msg.cond = txn_cond; + + req.msg_type = DBD_GET_TXN; + req.data = &get_msg; + rc = slurm_send_recv_slurmdbd_msg(&req, &resp); + + if (rc != SLURM_SUCCESS) + error("slurmdbd: DBD_GET_TXN failure: %m"); + else if (resp.msg_type != DBD_GOT_TXN) { + error("slurmdbd: response type not DBD_GOT_TXN: %u", + resp.msg_type); + } else { + got_msg = (dbd_list_msg_t *) resp.data; + ret_list = got_msg->my_list; + got_msg->my_list = NULL; + slurmdbd_free_list_msg(got_msg); + } return ret_list; } @@ -1231,7 +1359,12 @@ extern List jobacct_storage_p_get_jobs(void *db_conn, get_msg.selected_steps = selected_steps; get_msg.selected_parts = selected_parts; - get_msg.cluster_name = params->opt_cluster; + if(params->opt_cluster_list && list_count(params->opt_cluster_list)) { + ListIterator itr = + list_iterator_create(params->opt_cluster_list); + get_msg.cluster_name = list_next(itr); + list_iterator_destroy(itr); + } get_msg.gid = params->opt_gid; if (params->opt_uid >=0 && (pw=getpwuid(params->opt_uid))) diff --git a/src/plugins/checkpoint/xlch/Makefile.am b/src/plugins/checkpoint/xlch/Makefile.am index 5bcc9bae2..93d497510 100644 --- a/src/plugins/checkpoint/xlch/Makefile.am +++ b/src/plugins/checkpoint/xlch/Makefile.am @@ -10,10 +10,6 @@ pkglib_LTLIBRARIES = checkpoint_xlch.la checkpoint_xlch_la_SOURCES = checkpoint_xlch.c config.c checkpoint_xlch_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la - -checkpoint_xlch_la_LIBADD = $(convenience_libs) - config.c: Makefile @( echo "char *scch_path = \"$(prefix)/sbin/scch\";"\ ) > config.c diff --git a/src/plugins/checkpoint/xlch/Makefile.in b/src/plugins/checkpoint/xlch/Makefile.in index bece591cd..a873989cc 100644 --- a/src/plugins/checkpoint/xlch/Makefile.in +++ b/src/plugins/checkpoint/xlch/Makefile.in @@ -75,7 +75,7 @@ am__strip_dir = `echo $$p | sed -e 's|^.*/||'`; am__installdirs = "$(DESTDIR)$(pkglibdir)" pkglibLTLIBRARIES_INSTALL = $(INSTALL) LTLIBRARIES = $(pkglib_LTLIBRARIES) -checkpoint_xlch_la_DEPENDENCIES = $(convenience_libs) +checkpoint_xlch_la_LIBADD = am_checkpoint_xlch_la_OBJECTS = checkpoint_xlch.lo config.lo checkpoint_xlch_la_OBJECTS = $(am_checkpoint_xlch_la_OBJECTS) checkpoint_xlch_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ @@ -271,8 +271,6 @@ INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common pkglib_LTLIBRARIES = checkpoint_xlch.la checkpoint_xlch_la_SOURCES = checkpoint_xlch.c config.c checkpoint_xlch_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la -checkpoint_xlch_la_LIBADD = $(convenience_libs) all: all-am .SUFFIXES: diff --git a/src/plugins/jobcomp/Makefile.am b/src/plugins/jobcomp/Makefile.am index 0f2d169d4..14b8f4a09 100644 --- a/src/plugins/jobcomp/Makefile.am +++ b/src/plugins/jobcomp/Makefile.am @@ -1,3 +1,3 @@ # Makefile for jobcomp plugins -SUBDIRS = filetxt none script mysql pgsql slurmdbd +SUBDIRS = filetxt none script mysql pgsql diff --git a/src/plugins/jobcomp/Makefile.in b/src/plugins/jobcomp/Makefile.in index c83e5dd7c..70c8f3dbe 100644 --- a/src/plugins/jobcomp/Makefile.in +++ b/src/plugins/jobcomp/Makefile.in @@ -247,7 +247,7 @@ target_os = @target_os@ target_vendor = @target_vendor@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ -SUBDIRS = filetxt none script mysql pgsql slurmdbd +SUBDIRS = filetxt none script mysql pgsql all: all-recursive .SUFFIXES: diff --git a/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.c b/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.c index dd882778d..cf22d42a7 100644 --- a/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.c +++ b/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.c @@ -188,7 +188,7 @@ extern void filetxt_jobcomp_process_get_jobs(List job_list, { char line[BUFFER_SIZE]; char *fptr = NULL; - char *jobid = NULL; + int jobid = 0; char *partition = NULL; FILE *fd = NULL; int lc = 0; @@ -207,7 +207,7 @@ extern void filetxt_jobcomp_process_get_jobs(List job_list, terminated strings */ if(job_info_list) list_destroy(job_info_list); - jobid = NULL; + jobid = 0; partition = NULL; job_info_list = list_create(_destroy_filetxt_jobcomp_info); while(fptr) { @@ -220,7 +220,7 @@ extern void filetxt_jobcomp_process_get_jobs(List job_list, jobcomp_info->val = fptr; fptr = strstr(fptr, " "); if(!strcasecmp("JobId", jobcomp_info->name)) - jobid = jobcomp_info->val; + jobid = atoi(jobcomp_info->val); else if(!strcasecmp("Partition", jobcomp_info->name)) partition = jobcomp_info->val; @@ -245,7 +245,7 @@ extern void filetxt_jobcomp_process_get_jobs(List job_list, continue; itr = list_iterator_create(selected_steps); while((selected_step = list_next(itr))) { - if (strcmp(selected_step->job, jobid)) + if (selected_step->jobid == jobid) continue; /* job matches */ list_iterator_destroy(itr); diff --git a/src/plugins/jobcomp/script/jobcomp_script.c b/src/plugins/jobcomp/script/jobcomp_script.c index 503da6ce6..8f6348a67 100644 --- a/src/plugins/jobcomp/script/jobcomp_script.c +++ b/src/plugins/jobcomp/script/jobcomp_script.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * jobcomp_script.c - Script running slurm job completion logging plugin. - * $Id: jobcomp_script.c 14242 2008-06-11 23:29:49Z jette $ + * $Id: jobcomp_script.c 14500 2008-07-11 23:00:14Z jette $ ***************************************************************************** * Produced at Center for High Performance Computing, North Dakota State * University @@ -251,16 +251,15 @@ static char ** _extend_env (char ***envp) static int _env_append (char ***envp, const char *name, const char *val) { - char buf[4096]; - char *entry; + char *entry = NULL; char **ep; if (val == NULL) val = ""; - snprintf (buf, sizeof (buf) - 1, "%s=%s", name, val); + xstrfmtcat (entry, "%s=%s", name, val); - if (!(entry = xstrdup (buf))) + if (entry == NULL) return (-1); ep = _extend_env (envp); diff --git a/src/plugins/jobcomp/slurmdbd/Makefile.am b/src/plugins/jobcomp/slurmdbd/Makefile.am deleted file mode 100644 index 5ce733dbf..000000000 --- a/src/plugins/jobcomp/slurmdbd/Makefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Makefile for jobcomp/slurmdbd plugin - -AUTOMAKE_OPTIONS = foreign - -PLUGIN_FLAGS = -module -avoid-version --export-dynamic - -INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common - -pkglib_LTLIBRARIES = jobcomp_slurmdbd.la - -# Null job completion logging plugin. -jobcomp_slurmdbd_la_SOURCES = jobcomp_slurmdbd.c -jobcomp_slurmdbd_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) diff --git a/src/plugins/jobcomp/slurmdbd/Makefile.in b/src/plugins/jobcomp/slurmdbd/Makefile.in deleted file mode 100644 index 42f439efd..000000000 --- a/src/plugins/jobcomp/slurmdbd/Makefile.in +++ /dev/null @@ -1,555 +0,0 @@ -# Makefile.in generated by automake 1.10.1 from Makefile.am. -# @configure_input@ - -# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, -# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - -@SET_MAKE@ - -# Makefile for jobcomp/slurmdbd plugin - -VPATH = @srcdir@ -pkgdatadir = $(datadir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ -am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd -install_sh_DATA = $(install_sh) -c -m 644 -install_sh_PROGRAM = $(install_sh) -c -install_sh_SCRIPT = $(install_sh) -c -INSTALL_HEADER = $(INSTALL_DATA) -transform = $(program_transform_name) -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -build_triplet = @build@ -host_triplet = @host@ -target_triplet = @target@ -subdir = src/plugins/jobcomp/slurmdbd -DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in -ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ - $(top_srcdir)/auxdir/slurm.m4 \ - $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ - $(top_srcdir)/auxdir/x_ac_affinity.m4 \ - $(top_srcdir)/auxdir/x_ac_aix.m4 \ - $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ - $(top_srcdir)/auxdir/x_ac_cflags.m4 \ - $(top_srcdir)/auxdir/x_ac_databases.m4 \ - $(top_srcdir)/auxdir/x_ac_debug.m4 \ - $(top_srcdir)/auxdir/x_ac_elan.m4 \ - $(top_srcdir)/auxdir/x_ac_federation.m4 \ - $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ - $(top_srcdir)/auxdir/x_ac_gtk.m4 \ - $(top_srcdir)/auxdir/x_ac_munge.m4 \ - $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ - $(top_srcdir)/auxdir/x_ac_pam.m4 \ - $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ - $(top_srcdir)/auxdir/x_ac_readline.m4 \ - $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ - $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ - $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ - $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ - $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac -am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ - $(ACLOCAL_M4) -mkinstalldirs = $(install_sh) -d -CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h -CONFIG_CLEAN_FILES = -am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; -am__vpath_adj = case $$p in \ - $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ - *) f=$$p;; \ - esac; -am__strip_dir = `echo $$p | sed -e 's|^.*/||'`; -am__installdirs = "$(DESTDIR)$(pkglibdir)" -pkglibLTLIBRARIES_INSTALL = $(INSTALL) -LTLIBRARIES = $(pkglib_LTLIBRARIES) -jobcomp_slurmdbd_la_LIBADD = -am_jobcomp_slurmdbd_la_OBJECTS = jobcomp_slurmdbd.lo -jobcomp_slurmdbd_la_OBJECTS = $(am_jobcomp_slurmdbd_la_OBJECTS) -jobcomp_slurmdbd_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ - $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ - $(jobcomp_slurmdbd_la_LDFLAGS) $(LDFLAGS) -o $@ -DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm -depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp -am__depfiles_maybe = depfiles -COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ - $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ - --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -CCLD = $(CC) -LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ - --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ - $(LDFLAGS) -o $@ -SOURCES = $(jobcomp_slurmdbd_la_SOURCES) -DIST_SOURCES = $(jobcomp_slurmdbd_la_SOURCES) -ETAGS = etags -CTAGS = ctags -DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) -ACLOCAL = @ACLOCAL@ -AMTAR = @AMTAR@ -AR = @AR@ -AUTHD_CFLAGS = @AUTHD_CFLAGS@ -AUTHD_LIBS = @AUTHD_LIBS@ -AUTOCONF = @AUTOCONF@ -AUTOHEADER = @AUTOHEADER@ -AUTOMAKE = @AUTOMAKE@ -AWK = @AWK@ -BG_INCLUDES = @BG_INCLUDES@ -CC = @CC@ -CCDEPMODE = @CCDEPMODE@ -CFLAGS = @CFLAGS@ -CMD_LDFLAGS = @CMD_LDFLAGS@ -CPP = @CPP@ -CPPFLAGS = @CPPFLAGS@ -CXX = @CXX@ -CXXCPP = @CXXCPP@ -CXXDEPMODE = @CXXDEPMODE@ -CXXFLAGS = @CXXFLAGS@ -CYGPATH_W = @CYGPATH_W@ -DEFS = @DEFS@ -DEPDIR = @DEPDIR@ -DSYMUTIL = @DSYMUTIL@ -ECHO = @ECHO@ -ECHO_C = @ECHO_C@ -ECHO_N = @ECHO_N@ -ECHO_T = @ECHO_T@ -EGREP = @EGREP@ -ELAN_LIBS = @ELAN_LIBS@ -EXEEXT = @EXEEXT@ -F77 = @F77@ -FEDERATION_LDFLAGS = @FEDERATION_LDFLAGS@ -FFLAGS = @FFLAGS@ -GREP = @GREP@ -GTK2_CFLAGS = @GTK2_CFLAGS@ -GTK2_LIBS = @GTK2_LIBS@ -HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ -HAVEPGCONFIG = @HAVEPGCONFIG@ -HAVEPKGCONFIG = @HAVEPKGCONFIG@ -HAVE_AIX = @HAVE_AIX@ -HAVE_ELAN = @HAVE_ELAN@ -HAVE_FEDERATION = @HAVE_FEDERATION@ -HAVE_OPENSSL = @HAVE_OPENSSL@ -HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ -INSTALL = @INSTALL@ -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ -LDFLAGS = @LDFLAGS@ -LIBOBJS = @LIBOBJS@ -LIBS = @LIBS@ -LIBTOOL = @LIBTOOL@ -LIB_LDFLAGS = @LIB_LDFLAGS@ -LN_S = @LN_S@ -LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ -MAKEINFO = @MAKEINFO@ -MKDIR_P = @MKDIR_P@ -MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ -MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ -MUNGE_LIBS = @MUNGE_LIBS@ -MYSQL_CFLAGS = @MYSQL_CFLAGS@ -MYSQL_LIBS = @MYSQL_LIBS@ -NCURSES = @NCURSES@ -NMEDIT = @NMEDIT@ -NUMA_LIBS = @NUMA_LIBS@ -OBJEXT = @OBJEXT@ -PACKAGE = @PACKAGE@ -PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ -PACKAGE_NAME = @PACKAGE_NAME@ -PACKAGE_STRING = @PACKAGE_STRING@ -PACKAGE_TARNAME = @PACKAGE_TARNAME@ -PACKAGE_VERSION = @PACKAGE_VERSION@ -PAM_LIBS = @PAM_LIBS@ -PATH_SEPARATOR = @PATH_SEPARATOR@ -PGSQL_CFLAGS = @PGSQL_CFLAGS@ -PGSQL_LIBS = @PGSQL_LIBS@ -PLPA_LIBS = @PLPA_LIBS@ -PROCTRACKDIR = @PROCTRACKDIR@ -PROJECT = @PROJECT@ -PTHREAD_CC = @PTHREAD_CC@ -PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ -PTHREAD_LIBS = @PTHREAD_LIBS@ -RANLIB = @RANLIB@ -READLINE_LIBS = @READLINE_LIBS@ -RELEASE = @RELEASE@ -SED = @SED@ -SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ -SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ -SET_MAKE = @SET_MAKE@ -SHELL = @SHELL@ -SLURMCTLD_PORT = @SLURMCTLD_PORT@ -SLURMDBD_PORT = @SLURMDBD_PORT@ -SLURMD_PORT = @SLURMD_PORT@ -SLURM_API_AGE = @SLURM_API_AGE@ -SLURM_API_CURRENT = @SLURM_API_CURRENT@ -SLURM_API_MAJOR = @SLURM_API_MAJOR@ -SLURM_API_REVISION = @SLURM_API_REVISION@ -SLURM_API_VERSION = @SLURM_API_VERSION@ -SLURM_MAJOR = @SLURM_MAJOR@ -SLURM_MICRO = @SLURM_MICRO@ -SLURM_MINOR = @SLURM_MINOR@ -SLURM_VERSION = @SLURM_VERSION@ -SO_LDFLAGS = @SO_LDFLAGS@ -SSL_CPPFLAGS = @SSL_CPPFLAGS@ -SSL_LDFLAGS = @SSL_LDFLAGS@ -SSL_LIBS = @SSL_LIBS@ -STRIP = @STRIP@ -UTIL_LIBS = @UTIL_LIBS@ -VERSION = @VERSION@ -abs_builddir = @abs_builddir@ -abs_srcdir = @abs_srcdir@ -abs_top_builddir = @abs_top_builddir@ -abs_top_srcdir = @abs_top_srcdir@ -ac_ct_CC = @ac_ct_CC@ -ac_ct_CXX = @ac_ct_CXX@ -ac_ct_F77 = @ac_ct_F77@ -am__include = @am__include@ -am__leading_dot = @am__leading_dot@ -am__quote = @am__quote@ -am__tar = @am__tar@ -am__untar = @am__untar@ -bindir = @bindir@ -build = @build@ -build_alias = @build_alias@ -build_cpu = @build_cpu@ -build_os = @build_os@ -build_vendor = @build_vendor@ -builddir = @builddir@ -datadir = @datadir@ -datarootdir = @datarootdir@ -docdir = @docdir@ -dvidir = @dvidir@ -exec_prefix = @exec_prefix@ -host = @host@ -host_alias = @host_alias@ -host_cpu = @host_cpu@ -host_os = @host_os@ -host_vendor = @host_vendor@ -htmldir = @htmldir@ -includedir = @includedir@ -infodir = @infodir@ -install_sh = @install_sh@ -libdir = @libdir@ -libexecdir = @libexecdir@ -localedir = @localedir@ -localstatedir = @localstatedir@ -mandir = @mandir@ -mkdir_p = @mkdir_p@ -oldincludedir = @oldincludedir@ -pdfdir = @pdfdir@ -prefix = @prefix@ -program_transform_name = @program_transform_name@ -psdir = @psdir@ -sbindir = @sbindir@ -sharedstatedir = @sharedstatedir@ -srcdir = @srcdir@ -sysconfdir = @sysconfdir@ -target = @target@ -target_alias = @target_alias@ -target_cpu = @target_cpu@ -target_os = @target_os@ -target_vendor = @target_vendor@ -top_builddir = @top_builddir@ -top_srcdir = @top_srcdir@ -AUTOMAKE_OPTIONS = foreign -PLUGIN_FLAGS = -module -avoid-version --export-dynamic -INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common -pkglib_LTLIBRARIES = jobcomp_slurmdbd.la - -# Null job completion logging plugin. -jobcomp_slurmdbd_la_SOURCES = jobcomp_slurmdbd.c -jobcomp_slurmdbd_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) -all: all-am - -.SUFFIXES: -.SUFFIXES: .c .lo .o .obj -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) - @for dep in $?; do \ - case '$(am__configure_deps)' in \ - *$$dep*) \ - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ - && exit 0; \ - exit 1;; \ - esac; \ - done; \ - echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/jobcomp/slurmdbd/Makefile'; \ - cd $(top_srcdir) && \ - $(AUTOMAKE) --foreign src/plugins/jobcomp/slurmdbd/Makefile -.PRECIOUS: Makefile -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - @case '$?' in \ - *config.status*) \ - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ - *) \ - echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ - cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ - esac; - -$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh - -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES) - @$(NORMAL_INSTALL) - test -z "$(pkglibdir)" || $(MKDIR_P) "$(DESTDIR)$(pkglibdir)" - @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \ - if test -f $$p; then \ - f=$(am__strip_dir) \ - echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(pkglibLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) '$$p' '$(DESTDIR)$(pkglibdir)/$$f'"; \ - $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(pkglibLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) "$$p" "$(DESTDIR)$(pkglibdir)/$$f"; \ - else :; fi; \ - done - -uninstall-pkglibLTLIBRARIES: - @$(NORMAL_UNINSTALL) - @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \ - p=$(am__strip_dir) \ - echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$p'"; \ - $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$p"; \ - done - -clean-pkglibLTLIBRARIES: - -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES) - @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \ - dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ - test "$$dir" != "$$p" || dir=.; \ - echo "rm -f \"$${dir}/so_locations\""; \ - rm -f "$${dir}/so_locations"; \ - done -jobcomp_slurmdbd.la: $(jobcomp_slurmdbd_la_OBJECTS) $(jobcomp_slurmdbd_la_DEPENDENCIES) - $(jobcomp_slurmdbd_la_LINK) -rpath $(pkglibdir) $(jobcomp_slurmdbd_la_OBJECTS) $(jobcomp_slurmdbd_la_LIBADD) $(LIBS) - -mostlyclean-compile: - -rm -f *.$(OBJEXT) - -distclean-compile: - -rm -f *.tab.c - -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/jobcomp_slurmdbd.Plo@am__quote@ - -.c.o: -@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(COMPILE) -c $< - -.c.obj: -@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` - -.c.lo: -@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< - -mostlyclean-libtool: - -rm -f *.lo - -clean-libtool: - -rm -rf .libs _libs - -ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ - mkid -fID $$unique -tags: TAGS - -TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - tags=; \ - here=`pwd`; \ - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ - if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ - test -n "$$unique" || unique=$$empty_fix; \ - $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ - $$tags $$unique; \ - fi -ctags: CTAGS -CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - tags=; \ - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ - test -z "$(CTAGS_ARGS)$$tags$$unique" \ - || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ - $$tags $$unique - -GTAGS: - here=`$(am__cd) $(top_builddir) && pwd` \ - && cd $(top_srcdir) \ - && gtags -i $(GTAGS_ARGS) $$here - -distclean-tags: - -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags - -distdir: $(DISTFILES) - @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - list='$(DISTFILES)'; \ - dist_files=`for file in $$list; do echo $$file; done | \ - sed -e "s|^$$srcdirstrip/||;t" \ - -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ - case $$dist_files in \ - */*) $(MKDIR_P) `echo "$$dist_files" | \ - sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ - sort -u` ;; \ - esac; \ - for file in $$dist_files; do \ - if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ - if test -d $$d/$$file; then \ - dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ - if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ - cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ - fi; \ - cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ - else \ - test -f $(distdir)/$$file \ - || cp -p $$d/$$file $(distdir)/$$file \ - || exit 1; \ - fi; \ - done -check-am: all-am -check: check-am -all-am: Makefile $(LTLIBRARIES) -installdirs: - for dir in "$(DESTDIR)$(pkglibdir)"; do \ - test -z "$$dir" || $(MKDIR_P) "$$dir"; \ - done -install: install-am -install-exec: install-exec-am -install-data: install-data-am -uninstall: uninstall-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am - -installcheck: installcheck-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ - install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ - `test -z '$(STRIP)' || \ - echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) - -maintainer-clean-generic: - @echo "This command is intended for maintainers to use" - @echo "it deletes files that may require special tools to rebuild." -clean: clean-am - -clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \ - mostlyclean-am - -distclean: distclean-am - -rm -rf ./$(DEPDIR) - -rm -f Makefile -distclean-am: clean-am distclean-compile distclean-generic \ - distclean-tags - -dvi: dvi-am - -dvi-am: - -html: html-am - -info: info-am - -info-am: - -install-data-am: - -install-dvi: install-dvi-am - -install-exec-am: install-pkglibLTLIBRARIES - -install-html: install-html-am - -install-info: install-info-am - -install-man: - -install-pdf: install-pdf-am - -install-ps: install-ps-am - -installcheck-am: - -maintainer-clean: maintainer-clean-am - -rm -rf ./$(DEPDIR) - -rm -f Makefile -maintainer-clean-am: distclean-am maintainer-clean-generic - -mostlyclean: mostlyclean-am - -mostlyclean-am: mostlyclean-compile mostlyclean-generic \ - mostlyclean-libtool - -pdf: pdf-am - -pdf-am: - -ps: ps-am - -ps-am: - -uninstall-am: uninstall-pkglibLTLIBRARIES - -.MAKE: install-am install-strip - -.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ - clean-libtool clean-pkglibLTLIBRARIES ctags distclean \ - distclean-compile distclean-generic distclean-libtool \ - distclean-tags distdir dvi dvi-am html html-am info info-am \ - install install-am install-data install-data-am install-dvi \ - install-dvi-am install-exec install-exec-am install-html \ - install-html-am install-info install-info-am install-man \ - install-pdf install-pdf-am install-pkglibLTLIBRARIES \ - install-ps install-ps-am install-strip installcheck \ - installcheck-am installdirs maintainer-clean \ - maintainer-clean-generic mostlyclean mostlyclean-compile \ - mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ - tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/src/plugins/jobcomp/slurmdbd/jobcomp_slurmdbd.c b/src/plugins/jobcomp/slurmdbd/jobcomp_slurmdbd.c deleted file mode 100644 index aceee6cc8..000000000 --- a/src/plugins/jobcomp/slurmdbd/jobcomp_slurmdbd.c +++ /dev/null @@ -1,139 +0,0 @@ -/*****************************************************************************\ - * jobcomp_slurmdbd.c - SlurmDBD slurm job completion plugin. - ***************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Danny Aubke <da@llnl.gov>. - * LLNL-CODE-402394. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * In addition, as a special exception, the copyright holders give permission - * to link the code of portions of this program with the OpenSSL library under - * certain conditions as described in each individual source file, and - * distribute linked combinations including the two. You must obey the GNU - * General Public License in all respects for all of the code used other than - * OpenSSL. If you modify file(s) with this exception, you may extend this - * exception to your version of the file(s), but you are not obligated to do - * so. If you do not wish to do so, delete this exception statement from your - * version. If you delete this exception statement from all source files in - * the program, then also delete it here. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with SLURM; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -\*****************************************************************************/ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#if HAVE_STDINT_H -# include <stdint.h> -#endif -#if HAVE_INTTYPES_H -# include <inttypes.h> -#endif - -#include <stdio.h> -#include <slurm/slurm_errno.h> - -#include "src/common/slurm_jobcomp.h" -#include "src/slurmctld/slurmctld.h" - -/* - * These variables are required by the generic plugin interface. If they - * are not found in the plugin, the plugin loader will ignore it. - * - * plugin_name - a string giving a human-readable description of the - * plugin. There is no maximum length, but the symbol must refer to - * a valid string. - * - * plugin_type - a string suggesting the type of the plugin or its - * applicability to a particular form of data or method of data handling. - * If the low-level plugin API is used, the contents of this string are - * unimportant and may be anything. SLURM uses the higher-level plugin - * interface which requires this string to be of the form - * - * <application>/<method> - * - * where <application> is a description of the intended application of - * the plugin (e.g., "jobcomp" for SLURM job completion logging) and <method> - * is a description of how this plugin satisfies that application. SLURM will - * only load job completion logging plugins if the plugin_type string has a - * prefix of "jobcomp/". - * - * plugin_version - an unsigned 32-bit integer giving the version number - * of the plugin. If major and minor revisions are desired, the major - * version number may be multiplied by a suitable magnitude constant such - * as 100 or 1000. Various SLURM versions will likely require a certain - * minimum versions for their plugins as the job completion logging API - * matures. - */ -const char plugin_name[] = "Job completion logging SLURMDBD plugin"; -const char plugin_type[] = "jobcomp/slurmdbd"; -const uint32_t plugin_version = 100; - -/* - * init() is called when the plugin is loaded, before any other functions - * are called. Put global initialization here. - */ -int init ( void ) -{ - return SLURM_SUCCESS; -} - -/* - * The remainder of this file implements the standard SLURM job completion - * logging API. - */ - -int slurm_jobcomp_set_location ( char * location ) -{ - return SLURM_SUCCESS; -} - -int slurm_jobcomp_log_record ( struct job_record *job_ptr ) -{ - return SLURM_SUCCESS; -} - -int slurm_jobcomp_get_errno( void ) -{ - return SLURM_SUCCESS; -} - -char *slurm_jobcomp_strerror( int errnum ) -{ - return NULL; -} - -List slurm_jobcomp_get_jobs(List selected_steps, List selected_parts, - void *params) -{ - return NULL; -} - -void slurm_jobcomp_archive(List selected_parts, - void *params) -{ - return; -} - -int fini ( void ) -{ - return SLURM_SUCCESS; -} - - diff --git a/src/plugins/mpi/mvapich/mvapich.c b/src/plugins/mpi/mvapich/mvapich.c index f817698b7..5e9c50a92 100644 --- a/src/plugins/mpi/mvapich/mvapich.c +++ b/src/plugins/mpi/mvapich/mvapich.c @@ -408,6 +408,20 @@ static void mvapich_poll_destroy (struct mvapich_poll *mp) xfree (mp); } + +/* + * Call poll(2) on mvapich_poll object, handling EAGAIN and EINTR errors. + */ +static int mvapich_poll_internal (struct mvapich_poll *mp) +{ + int n; + while ((n = poll (mp->fds, mp->nfds, startup_timeout (mp->st))) < 0) { + if (errno != EINTR && errno != EAGAIN) + return (-1); + } + return (n); +} + /* * Poll for next available mvapich_info object with read/write activity * @@ -457,7 +471,7 @@ again: mvapich_debug3 ("mvapich_poll_next (nfds=%d, timeout=%d)\n", mp->nfds, startup_timeout (st)); - if ((rc = poll (mp->fds, mp->nfds, startup_timeout (st))) < 0) + if ((rc = mvapich_poll_internal (mp)) < 0) mvapich_terminate_job (st, "mvapich_poll_next: %m"); else if (rc == 0) { /* @@ -471,9 +485,9 @@ again: /* * Loop through poll fds and return first mvapich_info object - * we find that has the requested read/write activity. + * we find that has the requested read/write activity. * When found, we update the loop counter, and return - * the corresponding mvapich_info object. + * the corresponding mvapich_info object. * */ for (i = mp->counter; i < mp->nfds; i++) { @@ -786,7 +800,7 @@ static int mvapich_recv (mvapich_state_t *st, void* buf, int size, int rank) /* Scatter data in buf to ranks using chunks of size bytes */ static int mvapich_scatterbcast (mvapich_state_t *st, void* buf, int size) { - int rc; + int rc = 0; int n = 0; struct mvapich_poll *mp; struct mvapich_info *mvi; @@ -1284,8 +1298,9 @@ static int mvapich_abort_accept (mvapich_state_t *st) mvapich_abort_timeout ()); while ((rc = poll (pfds, 1, mvapich_abort_timeout ())) < 0) { - if (errno != EINTR) - return (-1); + if (errno == EINTR || errno == EAGAIN) + continue; + return (-1); } /* @@ -1689,11 +1704,13 @@ mvapich_initialize_connections (mvapich_state_t *st, mvapich_debug3 ("do_poll (nfds=%d)\n", nfds); - if ((rc = poll (fds, nfds, startup_timeout (st))) < 0) { + while ((rc = poll (fds, nfds, startup_timeout (st))) < 0) { + if (errno == EINTR || errno == EAGAIN) + continue; error ("mvapich: poll: %m"); break; } - else if (rc == 0) { + if (rc == 0) { report_absent_tasks (st, 1); mvapich_terminate_job (st, NULL); } diff --git a/src/plugins/select/bluegene/plugin/Makefile.am b/src/plugins/select/bluegene/plugin/Makefile.am index fde0b65dc..da526d6e7 100644 --- a/src/plugins/select/bluegene/plugin/Makefile.am +++ b/src/plugins/select/bluegene/plugin/Makefile.am @@ -47,3 +47,7 @@ slurm_epilog_SOURCES = slurm_epilog.c sfree_LDFLAGS = -export-dynamic -lm $(CMD_LDFLAGS) slurm_prolog_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) slurm_epilog_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) + +force: +$(select_bluegene_la_LIBADD) $(sfree_LDADD) : force + @cd `dirname $@` && $(MAKE) `basename $@` diff --git a/src/plugins/select/bluegene/plugin/Makefile.in b/src/plugins/select/bluegene/plugin/Makefile.in index e21b8b361..3e66184e5 100644 --- a/src/plugins/select/bluegene/plugin/Makefile.in +++ b/src/plugins/select/bluegene/plugin/Makefile.in @@ -689,6 +689,10 @@ uninstall-am: uninstall-pkglibLTLIBRARIES uninstall-sbinPROGRAMS tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES \ uninstall-sbinPROGRAMS + +force: +$(select_bluegene_la_LIBADD) $(sfree_LDADD) : force + @cd `dirname $@` && $(MAKE) `basename $@` # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/src/plugins/select/cons_res/dist_tasks.c b/src/plugins/select/cons_res/dist_tasks.c index 7a4fafe86..deafd2fd3 100644 --- a/src/plugins/select/cons_res/dist_tasks.c +++ b/src/plugins/select/cons_res/dist_tasks.c @@ -63,16 +63,22 @@ int compute_c_b_task_dist(struct select_cr_job *job) { int i, j, rc = SLURM_SUCCESS; + bool over_commit = false; bool over_subscribe = false; uint32_t taskid = 0, last_taskid, maxtasks = job->nprocs; + if (job->job_ptr->details && job->job_ptr->details->overcommit) + over_commit = true; + for (j = 0; (taskid < maxtasks); j++) { /* cycle counter */ bool space_remaining = false; last_taskid = taskid; for (i = 0; ((i < job->nhosts) && (taskid < maxtasks)); i++) { if ((j < job->cpus[i]) || over_subscribe) { taskid++; - job->alloc_cpus[i]++; + if ((job->alloc_cpus[i] == 0) || + (!over_commit)) + job->alloc_cpus[i]++; if ((j + 1) < job->cpus[i]) space_remaining = true; } @@ -508,6 +514,7 @@ extern int cr_plane_dist(struct select_cr_job *job, uint32_t taskcount = 0, last_taskcount; int job_index = -1; bool count_done = false; + bool over_commit = false; debug3("cons_res _cr_plane_dist plane_size %u ", plane_size); debug3("cons_res _cr_plane_dist maxtasks %u num_hosts %u", @@ -517,7 +524,10 @@ extern int cr_plane_dist(struct select_cr_job *job, error("Error in _cr_plane_dist"); return SLURM_ERROR; } - + + if (job->job_ptr->details && job->job_ptr->details->overcommit) + over_commit = true; + taskcount = 0; for (j=0; ((taskcount<maxtasks) && (!count_done)); j++) { last_taskcount = taskcount; @@ -530,7 +540,9 @@ extern int cr_plane_dist(struct select_cr_job *job, break; } taskcount++; - job->alloc_cpus[i]++; + if ((job->alloc_cpus[i] == 0) || + (!over_commit)) + job->alloc_cpus[i]++; } } if (last_taskcount == taskcount) { diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index e878ffe6f..43c5a66bd 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -2,7 +2,7 @@ * select_cons_res.c - node selection plugin supporting consumable * resources policies. * - * $Id: select_cons_res.c 14297 2008-06-20 15:41:06Z jette $ + * $Id: select_cons_res.c 14469 2008-07-09 18:15:23Z jette $ *****************************************************************************\ * * The following example below illustrates how four jobs are allocated @@ -599,6 +599,17 @@ static uint16_t _get_task_count(struct node_cr_record *select_node_ptr, &threads, alloc_cores, cr_type, job_ptr->job_id, this_node->node_ptr->name); + + if (job_ptr->details->job_min_memory & MEM_PER_CPU) { + uint32_t free_mem, mem_per_cpu; + int max_cpus; + mem_per_cpu = job_ptr->details->job_min_memory & (~MEM_PER_CPU); + free_mem = this_node->real_memory - this_node->alloc_memory; + max_cpus = free_mem / mem_per_cpu; + /* info("cpus avail:%d mem for %d", numtasks, max_cpus); */ + numtasks = MIN(numtasks, max_cpus); + } + #if (CR_DEBUG) info("cons_res: _get_task_count computed a_tasks %d s %d c %d " "t %d on %s for job %d", @@ -1992,8 +2003,9 @@ static int _verify_node_state(struct node_cr_record *select_node_ptr, enum node_cr_state job_node_req) { int i; - uint32_t free_mem; + uint32_t free_mem, min_mem; + min_mem = job_ptr->details->job_min_memory & (~MEM_PER_CPU); for (i = 0; i < select_node_cnt; i++) { if (!bit_test(bitmap, i)) continue; @@ -2003,7 +2015,7 @@ static int _verify_node_state(struct node_cr_record *select_node_ptr, (cr_type == CR_MEMORY) || (cr_type == CR_SOCKET_MEMORY))) { free_mem = select_node_ptr[i].real_memory; free_mem -= select_node_ptr[i].alloc_memory; - if (free_mem < job_ptr->details->job_min_memory) + if (free_mem < min_mem) goto clear_bit; } @@ -2589,9 +2601,6 @@ static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap, job->cpus[j] = 0; } job->alloc_cpus[j] = 0; - if ((cr_type == CR_CORE_MEMORY) || (cr_type == CR_CPU_MEMORY) || - (cr_type == CR_MEMORY) || (cr_type == CR_SOCKET_MEMORY)) - job->alloc_memory[j] = job_ptr->details->job_min_memory; if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)|| (cr_type == CR_SOCKET) || (cr_type == CR_SOCKET_MEMORY)) { _chk_resize_job(job, j, job->num_sockets[j]); @@ -2652,6 +2661,26 @@ static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap, return error_code; } + if (job_ptr->details->job_min_memory && + ((cr_type == CR_CORE_MEMORY) || (cr_type == CR_CPU_MEMORY) || + (cr_type == CR_MEMORY) || (cr_type == CR_SOCKET_MEMORY))) { + j = 0; + for (i = 0; i < node_record_count; i++) { + if (bit_test(bitmap, i) == 0) + continue; + if (job_ptr->details->job_min_memory & MEM_PER_CPU) { + job->alloc_memory[j] = job_ptr->details-> + job_min_memory & + (~MEM_PER_CPU); + job->alloc_memory[j] *= job->alloc_cpus[j]; + } else { + job->alloc_memory[j] = job_ptr->details-> + job_min_memory; + } + j++; + } + } + _append_to_job_list(job); last_cr_update_time = time(NULL); diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c index 28c8ddc8e..787580d21 100644 --- a/src/plugins/select/linear/select_linear.c +++ b/src/plugins/select/linear/select_linear.c @@ -2,8 +2,6 @@ * select_linear.c - node selection plugin for simple one-dimensional * address space. Selects nodes for a job so as to minimize the number * of sets of consecutive nodes using a best-fit algorithm. - * - * $Id: select_linear.c 14103 2008-05-21 20:31:22Z jette $ ***************************************************************************** * Copyright (C) 2004-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -559,7 +557,7 @@ static int _job_count_bitmap(struct node_cr_record *node_cr_ptr, { int i, count = 0, total_jobs, total_run_jobs; struct part_cr_record *part_cr_ptr; - uint32_t job_memory = 0; + uint32_t job_memory_cpu = 0, job_memory_node = 0; bool exclusive; xassert(node_cr_ptr); @@ -572,24 +570,42 @@ static int _job_count_bitmap(struct node_cr_record *node_cr_ptr, else exclusive = true; - if (job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) - job_memory = job_ptr->details->job_min_memory; + if (job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) { + if (job_ptr->details->job_min_memory & MEM_PER_CPU) { + job_memory_cpu = job_ptr->details->job_min_memory & + (~MEM_PER_CPU); + } else + job_memory_node = job_ptr->details->job_min_memory; + } for (i = 0; i < node_record_count; i++) { if (!bit_test(bitmap, i)) { bit_clear(jobmap, i); continue; } - - if (select_fast_schedule) { - if ((node_cr_ptr[i].alloc_memory + job_memory) > - node_record_table_ptr[i].config_ptr->real_memory) { - bit_clear(jobmap, i); - continue; + if (job_memory_cpu || job_memory_node) { + uint32_t alloc_mem, job_mem, avail_mem; + alloc_mem = node_cr_ptr[i].alloc_memory; + if (select_fast_schedule) { + avail_mem = node_record_table_ptr[i]. + config_ptr->real_memory; + if (job_memory_cpu) { + job_mem = job_memory_cpu * + node_record_table_ptr[i]. + config_ptr->cpus; + } else + job_mem = job_memory_node; + } else { + avail_mem = node_record_table_ptr[i]. + real_memory; + if (job_memory_cpu) { + job_mem = job_memory_cpu * + node_record_table_ptr[i]. + cpus; + } else + job_mem = job_memory_node; } - } else { - if ((node_cr_ptr[i].alloc_memory + job_memory) > - node_record_table_ptr[i].real_memory) { + if ((alloc_mem + job_mem) >avail_mem) { bit_clear(jobmap, i); continue; } @@ -1132,7 +1148,7 @@ static int _rm_job_from_nodes(struct node_cr_record *node_cr_ptr, { int i, rc = SLURM_SUCCESS; struct part_cr_record *part_cr_ptr; - uint32_t job_memory = 0; + uint32_t job_memory, job_memory_cpu = 0, job_memory_node = 0; if (node_cr_ptr == NULL) { error("%s: node_cr_ptr not initialized", pre_err); @@ -1140,12 +1156,27 @@ static int _rm_job_from_nodes(struct node_cr_record *node_cr_ptr, } if (remove_all && job_ptr->details && - job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) - job_memory = job_ptr->details->job_min_memory; + job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) { + if (job_ptr->details->job_min_memory & MEM_PER_CPU) { + job_memory_cpu = job_ptr->details->job_min_memory & + (~MEM_PER_CPU); + } else + job_memory_node = job_ptr->details->job_min_memory; + } for (i = 0; i < select_node_cnt; i++) { if (bit_test(job_ptr->node_bitmap, i) == 0) continue; + if (job_memory_cpu == 0) + job_memory = job_memory_node; + else if (select_fast_schedule) { + job_memory = job_memory_cpu * + node_record_table_ptr[i]. + config_ptr->cpus; + } else { + job_memory = job_memory_cpu * + node_record_table_ptr[i].cpus; + } if (node_cr_ptr[i].alloc_memory >= job_memory) node_cr_ptr[i].alloc_memory -= job_memory; else { @@ -1208,7 +1239,7 @@ static int _add_job_to_nodes(struct node_cr_record *node_cr_ptr, { int i, rc = SLURM_SUCCESS, exclusive = 0; struct part_cr_record *part_cr_ptr; - uint32_t job_memory = 0; + uint32_t job_memory_cpu = 0, job_memory_node = 0; if (node_cr_ptr == NULL) { error("%s: node_cr_ptr not initialized", pre_err); @@ -1216,15 +1247,32 @@ static int _add_job_to_nodes(struct node_cr_record *node_cr_ptr, } if (alloc_all && job_ptr->details && - job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) - job_memory = job_ptr->details->job_min_memory; + job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) { + if (job_ptr->details->job_min_memory & MEM_PER_CPU) { + job_memory_cpu = job_ptr->details->job_min_memory & + (~MEM_PER_CPU); + } else + job_memory_node = job_ptr->details->job_min_memory; + } + if (job_ptr->details->shared == 0) exclusive = 1; for (i = 0; i < select_node_cnt; i++) { if (bit_test(job_ptr->node_bitmap, i) == 0) continue; - node_cr_ptr[i].alloc_memory += job_memory; + if (job_memory_cpu == 0) + node_cr_ptr[i].alloc_memory += job_memory_node; + else if (select_fast_schedule) { + node_cr_ptr[i].alloc_memory += + job_memory_cpu * + node_record_table_ptr[i]. + config_ptr->cpus; + } else { + node_cr_ptr[i].alloc_memory += + job_memory_cpu * + node_record_table_ptr[i].cpus; + } if (exclusive) { if (node_cr_ptr[i].exclusive_jobid) { error("select/linear: conflicting exclusive " @@ -1341,7 +1389,7 @@ static void _init_node_cr(void) ListIterator part_iterator; struct job_record *job_ptr; ListIterator job_iterator; - uint32_t job_memory, step_mem; + uint32_t job_memory_cpu, job_memory_node, step_mem = 0; int exclusive, i, node_inx; ListIterator step_iterator; struct step_record *step_ptr; @@ -1375,11 +1423,17 @@ static void _init_node_cr(void) (job_ptr->job_state != JOB_SUSPENDED)) continue; + job_memory_cpu = 0; + job_memory_node = 0; if (job_ptr->details && - job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) - job_memory = job_ptr->details->job_min_memory; - else - job_memory = 0; + job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) { + if (job_ptr->details->job_min_memory & MEM_PER_CPU) { + job_memory_cpu = job_ptr->details->job_min_memory & + (~MEM_PER_CPU); + } else { + job_memory_node = job_ptr->details->job_min_memory; + } + } if (job_ptr->details->shared == 0) exclusive = 1; else @@ -1400,7 +1454,18 @@ static void _init_node_cr(void) } node_cr_ptr[i].exclusive_jobid = job_ptr->job_id; } - node_cr_ptr[i].alloc_memory += job_memory; + if (job_memory_cpu == 0) + node_cr_ptr[i].alloc_memory += job_memory_node; + else if (select_fast_schedule) { + node_cr_ptr[i].alloc_memory += + job_memory_cpu * + node_record_table_ptr[i]. + config_ptr->cpus; + } else { + node_cr_ptr[i].alloc_memory += + job_memory_cpu * + node_record_table_ptr[i].cpus; + } part_cr_ptr = node_cr_ptr[i].parts; while (part_cr_ptr) { if (part_cr_ptr->part_ptr != job_ptr->part_ptr) { diff --git a/src/sacct/Makefile.am b/src/sacct/Makefile.am index 4b25bcca7..c51d73435 100644 --- a/src/sacct/Makefile.am +++ b/src/sacct/Makefile.am @@ -1,13 +1,15 @@ # Makefile for sacct AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) bin_PROGRAMS = sacct -sacct_LDADD = $(top_builddir)/src/common/libcommon.o -ldl \ - $(top_builddir)/src/api/libslurmhelper.la +sacct_LDADD = $(top_builddir)/src/api/libslurm.o -ldl + +sacct_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) noinst_HEADERS = sacct.c sacct_SOURCES = sacct.c process.c print.c options.c sacct_stat.c @@ -16,5 +18,4 @@ force: $(sacct_LDADD) : force @cd `dirname $@` && $(MAKE) `basename $@` -sacct_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) diff --git a/src/sacct/Makefile.in b/src/sacct/Makefile.in index c3a7b5a9b..5f7bea047 100644 --- a/src/sacct/Makefile.in +++ b/src/sacct/Makefile.in @@ -75,8 +75,7 @@ PROGRAMS = $(bin_PROGRAMS) am_sacct_OBJECTS = sacct.$(OBJEXT) process.$(OBJEXT) print.$(OBJEXT) \ options.$(OBJEXT) sacct_stat.$(OBJEXT) sacct_OBJECTS = $(am_sacct_OBJECTS) -sacct_DEPENDENCIES = $(top_builddir)/src/common/libcommon.o \ - $(top_builddir)/src/api/libslurmhelper.la +sacct_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o sacct_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sacct_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -266,13 +265,12 @@ target_vendor = @target_vendor@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) -sacct_LDADD = $(top_builddir)/src/common/libcommon.o -ldl \ - $(top_builddir)/src/api/libslurmhelper.la - +sacct_LDADD = $(top_builddir)/src/api/libslurm.o -ldl +sacct_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) noinst_HEADERS = sacct.c sacct_SOURCES = sacct.c process.c print.c options.c sacct_stat.c -sacct_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) all: all-am .SUFFIXES: @@ -474,6 +472,7 @@ install-strip: mostlyclean-generic: clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) diff --git a/src/sacct/options.c b/src/sacct/options.c index 4f0a72990..8a924a70e 100644 --- a/src/sacct/options.c +++ b/src/sacct/options.c @@ -38,6 +38,7 @@ \*****************************************************************************/ #include "src/common/read_config.h" +#include "src/common/parse_time.h" #include "sacct.h" #include <time.h> @@ -76,6 +77,313 @@ void _help_fields_msg(void) return; } +static char *_convert_to_id(char *name, bool gid) +{ + if(gid) { + struct group *grp; + if (!(grp=getgrnam(name))) { + fprintf(stderr, "Invalid group id: %s\n", name); + exit(1); + } + xfree(name); + name = xstrdup_printf("%d", grp->gr_gid); + } else { + struct passwd *pwd; + if (!(pwd=getpwnam(name))) { + fprintf(stderr, "Invalid user id: %s\n", name); + exit(1); + } + xfree(name); + name = xstrdup_printf("%d", pwd->pw_uid); + } + return name; +} + +/* returns number of objects added to list */ +static int _addto_id_char_list(List char_list, char *names, bool gid) +{ + int i=0, start=0; + char *name = NULL, *tmp_char = NULL; + ListIterator itr = NULL; + char quote_c = '\0'; + int quote = 0; + int count = 0; + + if(!char_list) { + error("No list was given to fill in"); + return 0; + } + + itr = list_iterator_create(char_list); + if(names) { + if (names[i] == '\"' || names[i] == '\'') { + quote_c = names[i]; + quote = 1; + i++; + } + start = i; + while(names[i]) { + //info("got %d - %d = %d", i, start, i-start); + if(quote && names[i] == quote_c) + break; + else if (names[i] == '\"' || names[i] == '\'') + names[i] = '`'; + else if(names[i] == ',') { + if((i-start) > 0) { + name = xmalloc((i-start+1)); + memcpy(name, names+start, (i-start)); + //info("got %s %d", name, i-start); + if (!isdigit((int) *name)) { + name = _convert_to_id( + name, gid); + } + + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + list_append(char_list, name); + count++; + } else + xfree(name); + list_iterator_reset(itr); + } + i++; + start = i; + if(!names[i]) { + info("There is a problem with " + "your request. It appears you " + "have spaces inside your list."); + break; + } + } + i++; + } + if((i-start) > 0) { + name = xmalloc((i-start)+1); + memcpy(name, names+start, (i-start)); + + if (!isdigit((int) *name)) { + name = _convert_to_id(name, gid); + } + + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + list_append(char_list, name); + count++; + } else + xfree(name); + } + } + list_iterator_destroy(itr); + return count; +} + +/* returns number of objects added to list */ +static int _addto_state_char_list(List char_list, char *names) +{ + int i=0, start=0, c; + char *name = NULL, *tmp_char = NULL; + ListIterator itr = NULL; + char quote_c = '\0'; + int quote = 0; + int count = 0; + + if(!char_list) { + error("No list was given to fill in"); + return 0; + } + + itr = list_iterator_create(char_list); + if(names) { + if (names[i] == '\"' || names[i] == '\'') { + quote_c = names[i]; + quote = 1; + i++; + } + start = i; + while(names[i]) { + //info("got %d - %d = %d", i, start, i-start); + if(quote && names[i] == quote_c) + break; + else if (names[i] == '\"' || names[i] == '\'') + names[i] = '`'; + else if(names[i] == ',') { + if((i-start) > 0) { + name = xmalloc((i-start+1)); + memcpy(name, names+start, (i-start)); + c = decode_state_char(name); + if (c == -1) + fatal("unrecognized job " + "state value"); + xfree(name); + name = xstrdup_printf("%d", c); + + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + list_append(char_list, name); + count++; + } else + xfree(name); + list_iterator_reset(itr); + } + i++; + start = i; + if(!names[i]) { + info("There is a problem with " + "your request. It appears you " + "have spaces inside your list."); + break; + } + } + i++; + } + if((i-start) > 0) { + name = xmalloc((i-start)+1); + memcpy(name, names+start, (i-start)); + c = decode_state_char(name); + if (c == -1) + fatal("unrecognized job state value"); + xfree(name); + name = xstrdup_printf("%d", c); + + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + list_append(char_list, name); + count++; + } else + xfree(name); + } + } + list_iterator_destroy(itr); + return count; +} + +/* returns number of objects added to list */ +static int _addto_job_list(List job_list, char *names) +{ + int i=0, start=0; + char *name = NULL, *dot = NULL; + jobacct_selected_step_t *selected_step = NULL; + jobacct_selected_step_t *curr_step = NULL; + + ListIterator itr = NULL; + char quote_c = '\0'; + int quote = 0; + int count = 0; + + if(!job_list) { + error("No list was given to fill in"); + return 0; + } + + itr = list_iterator_create(job_list); + if(names) { + if (names[i] == '\"' || names[i] == '\'') { + quote_c = names[i]; + quote = 1; + i++; + } + start = i; + while(names[i]) { + //info("got %d - %d = %d", i, start, i-start); + if(quote && names[i] == quote_c) + break; + else if (names[i] == '\"' || names[i] == '\'') + names[i] = '`'; + else if(names[i] == ',') { + if((i-start) > 0) { + char *dot = NULL; + name = xmalloc((i-start+1)); + memcpy(name, names+start, (i-start)); + + selected_step = xmalloc( + sizeof(jobacct_selected_step_t)); + dot = strstr(name, "."); + if (dot == NULL) { + debug2("No jobstep requested"); + selected_step->stepid = NO_VAL; + } else { + *dot++ = 0; + selected_step->stepid = + atoi(dot); + } + selected_step->jobid = atoi(name); + xfree(name); + + while((curr_step = list_next(itr))) { + if((curr_step->jobid + == selected_step->jobid) + && (curr_step->stepid + == selected_step-> + stepid)) + break; + } + + if(!curr_step) { + list_append(job_list, + selected_step); + count++; + } else + destroy_jobacct_selected_step( + selected_step); + list_iterator_reset(itr); + } + i++; + start = i; + } + i++; + } + if((i-start) > 0) { + name = xmalloc((i-start)+1); + memcpy(name, names+start, (i-start)); + + selected_step = + xmalloc(sizeof(jobacct_selected_step_t)); + dot = strstr(name, "."); + if (dot == NULL) { + debug2("No jobstep requested"); + selected_step->stepid = NO_VAL; + } else { + *dot++ = 0; + selected_step->stepid = atoi(dot); + } + selected_step->jobid = atoi(name); + xfree(name); + + while((curr_step = list_next(itr))) { + if((curr_step->jobid == selected_step->jobid) + && (curr_step->stepid + == selected_step->stepid)) + break; + } + + if(!curr_step) { + list_append(job_list, selected_step); + count++; + } else + destroy_jobacct_selected_step( + selected_step); + } + } + list_iterator_destroy(itr); + return count; +} + void _help_msg(void) { slurm_ctl_conf_t *conf = slurm_conf_lock(); @@ -203,47 +511,26 @@ void _usage(void) void _init_params() { - params.opt_cluster = slurm_get_cluster_name(); /* --cluster */ - params.opt_completion = 0; /* --completion */ - params.opt_dump = 0; /* --dump */ - params.opt_dup = -1; /* --duplicates; +1 = explicitly set */ - params.opt_fdump = 0; /* --formattted_dump */ - params.opt_stat = 0; /* --stat */ - params.opt_gid = -1; /* --gid (-1=wildcard, 0=root) */ - params.opt_header = 1; /* can only be cleared */ - params.opt_help = 0; /* --help */ - params.opt_long = 0; /* --long */ - params.opt_lowmem = 0; /* --low_memory */ - params.opt_purge = 0; /* --purge */ - params.opt_total = 0; /* --total */ - params.opt_uid = -1; /* --uid (-1=wildcard, 0=root) */ - params.opt_uid_set = 0; - params.opt_verbose = 0; /* --verbose */ - params.opt_expire_timespec = NULL; /* --expire= */ - params.opt_field_list = NULL; /* --fields= */ - params.opt_filein = NULL; /* --file */ - params.opt_job_list = NULL; /* --jobs */ - params.opt_partition_list = NULL;/* --partitions */ - params.opt_state_list = NULL; /* --states */ + memset(¶ms, 0, sizeof(sacct_parameters_t)); } int decode_state_char(char *state) { - if (!strcasecmp(state, "p")) + if (!strncasecmp(state, "p", 1)) return JOB_PENDING; /* we should never see this */ - else if (!strcasecmp(state, "r")) + else if (!strncasecmp(state, "r", 1)) return JOB_RUNNING; - else if (!strcasecmp(state, "su")) + else if (!strncasecmp(state, "su", 1)) return JOB_SUSPENDED; - else if (!strcasecmp(state, "cd")) + else if (!strncasecmp(state, "cd", 2)) return JOB_COMPLETE; - else if (!strcasecmp(state, "ca")) + else if (!strncasecmp(state, "ca", 2)) return JOB_CANCELLED; - else if (!strcasecmp(state, "f")) + else if (!strncasecmp(state, "f", 1)) return JOB_FAILED; - else if (!strcasecmp(state, "to")) + else if (!strncasecmp(state, "to", 1)) return JOB_TIMEOUT; - else if (!strcasecmp(state, "nf")) + else if (!strncasecmp(state, "nf", 1)) return JOB_NODE_FAIL; else return -1; // unknown @@ -258,13 +545,26 @@ int get_data(void) ListIterator itr_step = NULL; if(params.opt_completion) { - jobs = g_slurm_jobcomp_get_jobs(selected_steps, - selected_parts, ¶ms); + jobs = g_slurm_jobcomp_get_jobs(params.opt_job_list, + params.opt_partition_list, + ¶ms); return SLURM_SUCCESS; } else { - jobs = jobacct_storage_g_get_jobs(acct_db_conn, - selected_steps, - selected_parts, ¶ms); + acct_job_cond_t *job_cond = xmalloc(sizeof(acct_job_cond_t)); + + job_cond->acct_list = params.opt_acct_list; + job_cond->cluster_list = params.opt_cluster_list; + job_cond->duplicates = params.opt_dup; + job_cond->groupid_list = params.opt_gid_list; + job_cond->partition_list = params.opt_partition_list; + job_cond->step_list = params.opt_job_list; + job_cond->state_list = params.opt_state_list; + job_cond->usage_start = params.opt_begin; + job_cond->usage_end = params.opt_end; + job_cond->userid_list = params.opt_uid_list; + + jobs = jobacct_storage_g_get_jobs_cond(acct_db_conn, job_cond); + destroy_acct_job_cond(job_cond); } if (params.opt_fdump) @@ -321,19 +621,22 @@ void parse_command_line(int argc, char **argv) struct stat stat_buf; char *dot = NULL; bool brief_output = FALSE, long_output = FALSE; + bool all_users = 0; static struct option long_options[] = { {"all", 0,0, 'a'}, + {"accounts", 1, 0, 'A'}, + {"begin", 1, 0, 'B'}, {"brief", 0, 0, 'b'}, {"cluster", 1, 0, 'C'}, {"completion", 0, ¶ms.opt_completion, 'c'}, {"duplicates", 0, ¶ms.opt_dup, 1}, {"dump", 0, 0, 'd'}, + {"end", 1, 0, 'E'}, {"expire", 1, 0, 'e'}, {"fields", 1, 0, 'F'}, {"file", 1, 0, 'f'}, {"formatted_dump", 0, 0, 'O'}, - {"stat", 0, 0, 'S'}, {"gid", 1, 0, 'g'}, {"group", 1, 0, 'g'}, {"help", 0, ¶ms.opt_help, 1}, @@ -342,10 +645,11 @@ void parse_command_line(int argc, char **argv) {"long", 0, 0, 'l'}, {"big_logfile", 0, ¶ms.opt_lowmem, 1}, {"noduplicates", 0, ¶ms.opt_dup, 0}, - {"noheader", 0, ¶ms.opt_header, 0}, + {"noheader", 0, ¶ms.opt_noheader, 1}, {"partition", 1, 0, 'p'}, {"purge", 0, 0, 'P'}, {"state", 1, 0, 's'}, + {"stat", 0, 0, 'S'}, {"total", 0, 0, 't'}, {"uid", 1, 0, 'u'}, {"usage", 0, ¶ms.opt_help, 3}, @@ -356,30 +660,40 @@ void parse_command_line(int argc, char **argv) _init_params(); - if ((i=getuid())) - /* default to current user unless root*/ - params.opt_uid = i; + params.opt_uid = getuid(); + params.opt_gid = getgid(); opterr = 1; /* Let getopt report problems to the user */ while (1) { /* now cycle through the command line */ - c = getopt_long(argc, argv, "abcC:de:F:f:g:hj:J:lOPp:s:StUu:Vv", + c = getopt_long(argc, argv, "aA:bB:cC:de:E:F:f:g:hj:lOPp:s:StUu:Vv", long_options, &optionIndex); if (c == -1) break; switch (c) { case 'a': - params.opt_uid = -1; + all_users = 1; + break; + case 'A': + if(!params.opt_acct_list) + params.opt_acct_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(params.opt_acct_list, optarg); break; case 'b': brief_output = true; break; + case 'B': + params.opt_begin = parse_time(optarg); + break; case 'c': params.opt_completion = 1; break; case 'C': - xfree(params.opt_cluster); - params.opt_cluster = xstrdup(optarg); + if(!params.opt_cluster_list) + params.opt_cluster_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(params.opt_cluster_list, optarg); break; case 'd': params.opt_dump = 1; @@ -429,9 +743,11 @@ void parse_command_line(int argc, char **argv) exit(1); } } - params.opt_uid = -1; /* fix default; can't purge by uid */ break; - + + case 'E': + params.opt_end = parse_time(optarg); + break; case 'F': if(params.opt_stat) xfree(params.opt_field_list); @@ -452,18 +768,10 @@ void parse_command_line(int argc, char **argv) break; case 'g': - if (isdigit((int) *optarg)) - params.opt_gid = atoi(optarg); - else { - struct group *grp; - if ((grp=getgrnam(optarg))==NULL) { - fprintf(stderr, - "Invalid group id: %s\n", - optarg); - exit(1); - } - params.opt_gid=grp->gr_gid; - } + if(!params.opt_gid_list) + params.opt_gid_list = + list_create(slurm_destroy_char); + _addto_id_char_list(params.opt_gid_list, optarg, 1); break; case 'h': @@ -478,13 +786,11 @@ void parse_command_line(int argc, char **argv) optarg); exit(1); } - params.opt_job_list = - xrealloc(params.opt_job_list, - (params.opt_job_list==NULL? 0 : - strlen(params.opt_job_list)) + - strlen(optarg) + 1); - strcat(params.opt_job_list, optarg); - strcat(params.opt_job_list, ","); + + if(!params.opt_job_list) + params.opt_job_list = list_create( + destroy_jobacct_selected_step); + _addto_job_list(params.opt_job_list, optarg); break; case 'l': @@ -500,25 +806,20 @@ void parse_command_line(int argc, char **argv) break; case 'p': - params.opt_partition_list = - xrealloc(params.opt_partition_list, - (params.opt_partition_list==NULL? 0 : - strlen(params.opt_partition_list)) + - strlen(optarg) + 1); - strcat(params.opt_partition_list, optarg); - strcat(params.opt_partition_list, ","); - break; + if(!params.opt_partition_list) + params.opt_partition_list = + list_create(slurm_destroy_char); - case 's': - params.opt_state_list = - xrealloc(params.opt_state_list, - (params.opt_state_list==NULL? 0 : - strlen(params.opt_state_list)) + - strlen(optarg) + 1); - strcat(params.opt_state_list, optarg); - strcat(params.opt_state_list, ","); + slurm_addto_char_list(params.opt_partition_list, + optarg); break; + case 's': + if(!params.opt_state_list) + params.opt_state_list = + list_create(slurm_destroy_char); + _addto_state_char_list(params.opt_state_list, optarg); + break; case 'S': if(!params.opt_field_list) { params.opt_field_list = @@ -538,18 +839,10 @@ void parse_command_line(int argc, char **argv) break; case 'u': - if (isdigit((int) *optarg) || atoi(optarg) == -1) - params.opt_uid = atoi(optarg); - else { - struct passwd *pwd; - if ((pwd=getpwnam(optarg))==NULL) { - fprintf(stderr, - "Invalid user id: %s\n", - optarg); - exit(1); - } - params.opt_uid=pwd->pw_uid; - } + if(!params.opt_uid_list) + params.opt_uid_list = + list_create(slurm_destroy_char); + _addto_id_char_list(params.opt_uid_list, optarg, 0); break; case 'v': @@ -584,14 +877,13 @@ void parse_command_line(int argc, char **argv) /* Now set params.opt_dup, unless they've already done so */ if (params.opt_dup < 0) /* not already set explicitly */ - if (params.opt_job_list) - /* They probably want the most recent job N if - * they requested specific jobs or steps. */ - params.opt_dup = 0; + params.opt_dup = 0; + + if (params.opt_fdump) + params.opt_dup |= FDUMP_FLAG; if (params.opt_verbose) { fprintf(stderr, "Options selected:\n" - "\topt_cluster=%s\n" "\topt_completion=%d\n" "\topt_dump=%d\n" "\topt_dup=%d\n" @@ -600,18 +892,13 @@ void parse_command_line(int argc, char **argv) "\topt_stat=%d\n" "\topt_field_list=%s\n" "\topt_filein=%s\n" - "\topt_header=%d\n" + "\topt_noheader=%d\n" "\topt_help=%d\n" - "\topt_job_list=%s\n" "\topt_long=%d\n" "\topt_lowmem=%d\n" - "\topt_partition_list=%s\n" "\topt_purge=%d\n" - "\topt_state_list=%s\n" "\topt_total=%d\n" - "\topt_uid=%d\n" "\topt_verbose=%d\n", - params.opt_cluster, params.opt_completion, params.opt_dump, params.opt_dup, @@ -620,16 +907,12 @@ void parse_command_line(int argc, char **argv) params.opt_stat, params.opt_field_list, params.opt_filein, - params.opt_header, + params.opt_noheader, params.opt_help, - params.opt_job_list, params.opt_long, params.opt_lowmem, - params.opt_partition_list, params.opt_purge, - params.opt_state_list, params.opt_total, - params.opt_uid, params.opt_verbose); } @@ -665,98 +948,101 @@ void parse_command_line(int argc, char **argv) xfree(acct_type); } - /* specific partitions requested? */ - if (params.opt_partition_list) { - - start = params.opt_partition_list; - while ((end = strstr(start, ",")) && start) { - *end = 0; - while (isspace(*start)) - start++; /* discard whitespace */ - if(!(int)*start) - continue; - acct_type = xstrdup(start); - list_append(selected_parts, acct_type); - start = end + 1; + /* specific clusters requested? */ + if (params.opt_verbose && params.opt_cluster_list + && list_count(params.opt_cluster_list)) { + fprintf(stderr, "Clusters requested:\n"); + itr = list_iterator_create(params.opt_cluster_list); + while((start = list_next(itr))) + fprintf(stderr, "\t: %s\n", start); + list_iterator_destroy(itr); + } else if(!params.opt_cluster_list + || !list_count(params.opt_cluster_list)) { + if(!params.opt_cluster_list) + params.opt_cluster_list = + list_create(slurm_destroy_char); + if((start = slurm_get_cluster_name())) + list_append(params.opt_cluster_list, start); + if(params.opt_verbose) { + fprintf(stderr, "Clusters requested:\n"); + fprintf(stderr, "\t: %s\n", start); } - if (params.opt_verbose) { - fprintf(stderr, "Partitions requested:\n"); - itr = list_iterator_create(selected_parts); - while((start = list_next(itr))) - fprintf(stderr, "\t: %s\n", start); - list_iterator_destroy(itr); + } + + if(all_users) { + if(params.opt_uid_list + && list_count(params.opt_uid_list)) { + list_destroy(params.opt_uid_list); + params.opt_uid_list = NULL; + } + if(params.opt_verbose) + fprintf(stderr, "Userids requested:\n\t: all\n"); + } else if (params.opt_verbose && params.opt_uid_list + && list_count(params.opt_uid_list)) { + fprintf(stderr, "Userids requested:\n"); + itr = list_iterator_create(params.opt_uid_list); + while((start = list_next(itr))) + fprintf(stderr, "\t: %s\n", start); + list_iterator_destroy(itr); + } else if(!params.opt_uid_list + || !list_count(params.opt_uid_list)) { + if(!params.opt_uid_list) + params.opt_uid_list = + list_create(slurm_destroy_char); + start = xstrdup_printf("%u", params.opt_uid); + list_append(params.opt_uid_list, start); + if(params.opt_verbose) { + fprintf(stderr, "Userids requested:\n"); + fprintf(stderr, "\t: %s\n", start); } } + if (params.opt_verbose && params.opt_gid_list + && list_count(params.opt_gid_list)) { + fprintf(stderr, "Groupids requested:\n"); + itr = list_iterator_create(params.opt_gid_list); + while((start = list_next(itr))) + fprintf(stderr, "\t: %s\n", start); + list_iterator_destroy(itr); + } + + /* specific partitions requested? */ + if (params.opt_verbose && params.opt_partition_list + && list_count(params.opt_partition_list)) { + fprintf(stderr, "Partitions requested:\n"); + itr = list_iterator_create(params.opt_partition_list); + while((start = list_next(itr))) + fprintf(stderr, "\t: %s\n", start); + list_iterator_destroy(itr); + } + /* specific jobs requested? */ - if (params.opt_job_list) { - start = params.opt_job_list; - while ((end = strstr(start, ",")) && start) { - *end = 0; - while (isspace(*start)) - start++; /* discard whitespace */ - if(!(int)*start) - continue; - selected_step = - xmalloc(sizeof(jobacct_selected_step_t)); - list_append(selected_steps, selected_step); - - dot = strstr(start, "."); - if (dot == NULL) { - debug2("No jobstep requested"); - selected_step->step = NULL; - selected_step->stepid = (uint32_t)NO_VAL; - } else { - *dot++ = 0; - selected_step->step = xstrdup(dot); - selected_step->stepid = atoi(dot); - } - selected_step->job = xstrdup(start); - selected_step->jobid = atoi(start); - start = end + 1; - } - if (params.opt_verbose) { - fprintf(stderr, "Jobs requested:\n"); - itr = list_iterator_create(selected_steps); - while((selected_step = list_next(itr))) { - if(selected_step->step) - fprintf(stderr, "\t: %s.%s\n", - selected_step->job, - selected_step->step); - else - fprintf(stderr, "\t: %s\n", - selected_step->job); - } - list_iterator_destroy(itr); + if (params.opt_verbose && params.opt_job_list + && list_count(params.opt_job_list)) { + fprintf(stderr, "Jobs requested:\n"); + itr = list_iterator_create(params.opt_job_list); + while((selected_step = list_next(itr))) { + if(selected_step->stepid != NO_VAL) + fprintf(stderr, "\t: %d.%d\n", + selected_step->jobid, + selected_step->stepid); + else + fprintf(stderr, "\t: %d\n", + selected_step->jobid); } + list_iterator_destroy(itr); } /* specific states (completion state) requested? */ - if (params.opt_state_list) { - start = params.opt_state_list; - while ((end = strstr(start, ",")) && start) { - int c; - *end = 0; - while (isspace(*start)) - start++; /* discard whitespace */ - if(!(int)*start) - continue; - c = decode_state_char(start); - if (c == -1) - fatal("unrecognized job state value"); - selected_state[c] = 1; - start = end + 1; - } - if (params.opt_verbose) { - fprintf(stderr, "States requested:\n"); - for(i=0; i< STATE_COUNT; i++) { - if(selected_state[i]) { - fprintf(stderr, "\t: %s\n", - job_state_string(i)); - break; - } - } + if (params.opt_verbose && params.opt_state_list + && list_count(params.opt_state_list)) { + fprintf(stderr, "States requested:\n"); + itr = list_iterator_create(params.opt_state_list); + while((start = list_next(itr))) { + fprintf(stderr, "\t: %s\n", + job_state_string(atoi(start))); } + list_iterator_destroy(itr); } /* select the output fields */ @@ -1054,18 +1340,13 @@ void do_dump_completion(void) /* do_expire() -- purge expired data from the accounting log file */ -void do_expire(int dummy) +void do_expire() { - if (dummy == NO_VAL) { - /* just load the symbol, don't want to execute */ - slurm_reconfigure(); - } - if(params.opt_completion) - g_slurm_jobcomp_archive(selected_parts, ¶ms); + g_slurm_jobcomp_archive(params.opt_partition_list, ¶ms); else jobacct_storage_g_archive(acct_db_conn, - selected_parts, ¶ms); + params.opt_partition_list, ¶ms); } void do_help(void) @@ -1107,13 +1388,6 @@ void do_list(void) do_jobsteps = 0; itr = list_iterator_create(jobs); while((job = list_next(itr))) { - /* This is really handled when we got the data except - for the filetxt plugin so keep it here. - */ - if (params.opt_uid >= 0 && (job->uid != params.opt_uid)) - continue; - if (params.opt_gid >= 0 && (job->gid != params.opt_gid)) - continue; if(job->sacct.min_cpu == NO_VAL) job->sacct.min_cpu = 0; @@ -1125,20 +1399,12 @@ void do_list(void) } if (job->show_full) { - if (params.opt_state_list) { - if(!selected_state[job->state]) - continue; - } print_fields(JOB, job); } if (do_jobsteps && (job->track_steps || !job->show_full)) { itr_step = list_iterator_create(job->steps); while((step = list_next(itr_step))) { - if (params.opt_state_list) { - if(!selected_state[step->state]) - continue; - } if(step->end == 0) step->end = job->end; step->account = job->account; @@ -1165,10 +1431,6 @@ void do_list_completion(void) itr = list_iterator_create(jobs); while((job = list_next(itr))) { - if (params.opt_uid >= 0 && (job->uid != params.opt_uid)) - continue; - if (params.opt_gid >= 0 && (job->gid != params.opt_gid)) - continue; print_fields(JOBCOMP, job); } list_iterator_destroy(itr); @@ -1177,36 +1439,33 @@ void do_list_completion(void) void do_stat() { ListIterator itr = NULL; - uint32_t jobid = 0; uint32_t stepid = 0; jobacct_selected_step_t *selected_step = NULL; - - itr = list_iterator_create(selected_steps); + + if(!params.opt_job_list || !list_count(params.opt_job_list)) { + fprintf(stderr, "No job list given to stat.\n"); + return; + } + + itr = list_iterator_create(params.opt_job_list); while((selected_step = list_next(itr))) { - jobid = atoi(selected_step->job); - if(selected_step->step) - stepid = atoi(selected_step->step); + if(selected_step->stepid != NO_VAL) + stepid = selected_step->stepid; else stepid = 0; - sacct_stat(jobid, stepid); + sacct_stat(selected_step->jobid, stepid); } list_iterator_destroy(itr); } + void sacct_init() { - int i=0; - selected_parts = list_create(slurm_destroy_char); - selected_steps = list_create(destroy_jobacct_selected_step); - for(i=0; i<STATE_COUNT; i++) - selected_state[i] = 0; } void sacct_fini() { if(jobs) list_destroy(jobs); - list_destroy(selected_parts); - list_destroy(selected_steps); if(params.opt_completion) g_slurm_jobcomp_fini(); else { diff --git a/src/sacct/sacct.c b/src/sacct/sacct.c index 79c6981c3..f1b968dda 100644 --- a/src/sacct/sacct.c +++ b/src/sacct/sacct.c @@ -257,16 +257,12 @@ int main(int argc, char **argv) "\topt_total=%d\n" "\topt_field_list=%s\n" "\topt_gid=%d\n" - "\topt_uid=%d\n" - "\topt_job_list=%s\n" - "\topt_state_list=%s\n", + "\topt_uid=%d\n", params.opt_long, params.opt_total, params.opt_field_list, params.opt_gid, - params.opt_uid, - params.opt_job_list, - params.opt_state_list); + params.opt_uid); invalidSwitchCombo("--expire", "--brief, --long, --fields, " "--total, --gid, --uid, --jobs, " @@ -288,14 +284,14 @@ int main(int argc, char **argv) do_dump(); break; case SACCT_EXPIRE: - do_expire(0); + do_expire(); break; case SACCT_FDUMP: if(get_data() == SLURM_ERROR) exit(errno); break; case SACCT_LIST: - if (params.opt_header) /* give them something to look */ + if (!params.opt_noheader)/* give them something to look */ _print_header();/* at while we think... */ if(get_data() == SLURM_ERROR) exit(errno); @@ -310,7 +306,7 @@ int main(int argc, char **argv) "in the future please make note this will " "not be supported.\n"); - if (params.opt_header) /* give them something to look */ + if (!params.opt_noheader)/* give them something to look */ _print_header();/* at while we think... */ do_stat(); break; diff --git a/src/sacct/sacct.h b/src/sacct/sacct.h index c340bc9ee..23966f2d2 100644 --- a/src/sacct/sacct.h +++ b/src/sacct/sacct.h @@ -159,7 +159,7 @@ int get_data(void); void parse_command_line(int argc, char **argv); void do_dump(void); void do_dump_completion(void); -void do_expire(int dummy); +void do_expire(); void do_help(void); void do_list(void); void do_list_completion(void); diff --git a/src/sacctmgr/Makefile.am b/src/sacctmgr/Makefile.am index 8ff241fbb..800497fbf 100644 --- a/src/sacctmgr/Makefile.am +++ b/src/sacctmgr/Makefile.am @@ -1,11 +1,16 @@ # Makefile for sacctmgr AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) bin_PROGRAMS = sacctmgr +sacctmgr_LDADD = \ + $(top_builddir)/src/api/libslurm.o -ldl\ + $(READLINE_LIBS) + sacctmgr_SOURCES = \ account_functions.c \ association_functions.c \ @@ -13,15 +18,12 @@ sacctmgr_SOURCES = \ common.c \ file_functions.c \ sacctmgr.c sacctmgr.h \ + qos_functions.c \ + txn_functions.c \ user_functions.c -sacctmgr_LDADD = \ - $(top_builddir)/src/common/libcommon.o -ldl \ - $(top_builddir)/src/api/libslurmhelper.la \ - $(READLINE_LIBS) - sacctmgr_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) force: -$(convenience_libs) : force +$(sacctmgr_LDADD) : force @cd `dirname $@` && $(MAKE) `basename $@` diff --git a/src/sacctmgr/Makefile.in b/src/sacctmgr/Makefile.in index 1a61db6f7..c90956531 100644 --- a/src/sacctmgr/Makefile.in +++ b/src/sacctmgr/Makefile.in @@ -73,11 +73,11 @@ PROGRAMS = $(bin_PROGRAMS) am_sacctmgr_OBJECTS = account_functions.$(OBJEXT) \ association_functions.$(OBJEXT) cluster_functions.$(OBJEXT) \ common.$(OBJEXT) file_functions.$(OBJEXT) sacctmgr.$(OBJEXT) \ + qos_functions.$(OBJEXT) txn_functions.$(OBJEXT) \ user_functions.$(OBJEXT) sacctmgr_OBJECTS = $(am_sacctmgr_OBJECTS) am__DEPENDENCIES_1 = -sacctmgr_DEPENDENCIES = $(top_builddir)/src/common/libcommon.o \ - $(top_builddir)/src/api/libslurmhelper.la \ +sacctmgr_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o \ $(am__DEPENDENCIES_1) sacctmgr_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sacctmgr_LDFLAGS) \ @@ -267,7 +267,12 @@ target_vendor = @target_vendor@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) +sacctmgr_LDADD = \ + $(top_builddir)/src/api/libslurm.o -ldl\ + $(READLINE_LIBS) + sacctmgr_SOURCES = \ account_functions.c \ association_functions.c \ @@ -275,13 +280,10 @@ sacctmgr_SOURCES = \ common.c \ file_functions.c \ sacctmgr.c sacctmgr.h \ + qos_functions.c \ + txn_functions.c \ user_functions.c -sacctmgr_LDADD = \ - $(top_builddir)/src/common/libcommon.o -ldl \ - $(top_builddir)/src/api/libslurmhelper.la \ - $(READLINE_LIBS) - sacctmgr_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) all: all-am @@ -359,7 +361,9 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cluster_functions.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/common.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/file_functions.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/qos_functions.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sacctmgr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/txn_functions.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/user_functions.Po@am__quote@ .c.o: @@ -486,6 +490,7 @@ install-strip: mostlyclean-generic: clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) @@ -569,7 +574,7 @@ uninstall-am: uninstall-binPROGRAMS force: -$(convenience_libs) : force +$(sacctmgr_LDADD) : force @cd `dirname $@` && $(MAKE) `basename $@` # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. diff --git a/src/sacctmgr/account_functions.c b/src/sacctmgr/account_functions.c index 43df7c633..6a83719b5 100644 --- a/src/sacctmgr/account_functions.c +++ b/src/sacctmgr/account_functions.c @@ -46,57 +46,104 @@ static int _set_cond(int *start, int argc, char *argv[], int a_set = 0; int u_set = 0; int end = 0; + List qos_list = NULL; + + if(!acct_cond) { + exit_code=1; + fprintf(stderr, "No acct_cond given"); + return -1; + } + + if(!acct_cond->assoc_cond) { + acct_cond->assoc_cond = + xmalloc(sizeof(acct_association_cond_t)); + acct_cond->assoc_cond->fairshare = NO_VAL; + acct_cond->assoc_cond->max_cpu_secs_per_job = NO_VAL; + acct_cond->assoc_cond->max_jobs = NO_VAL; + acct_cond->assoc_cond->max_nodes_per_job = NO_VAL; + acct_cond->assoc_cond->max_wall_duration_per_job = NO_VAL; + } for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Set", 3) == 0) { + if (!strncasecmp (argv[i], "Set", 3)) { i--; break; - } else if (strncasecmp (argv[i], "WithAssoc", 5) == 0) { + } else if (!strncasecmp (argv[i], "WithAssoc", 5)) { acct_cond->with_assocs = 1; - } else if (strncasecmp (argv[i], "WithCoordinators", 5) == 0) { + } else if (!strncasecmp (argv[i], "WithCoordinators", 5)) { acct_cond->with_coords = 1; } else if(!end && !strncasecmp(argv[i], "where", 5)) { continue; - } else if(!end) { - addto_char_list(acct_cond->acct_list, argv[i]); - addto_char_list(acct_cond->assoc_cond->acct_list, - argv[i]); - u_set = 1; - } else if (strncasecmp (argv[i], "Clusters", 1) == 0) { - addto_char_list(acct_cond->assoc_cond->cluster_list, - argv[i]+end); - a_set = 1; - } else if (strncasecmp (argv[i], "Descriptions", 1) == 0) { - addto_char_list(acct_cond->description_list, - argv[i]+end); - u_set = 1; - } else if (strncasecmp (argv[i], "Format", 1) == 0) { + } else if(!end + || !strncasecmp (argv[i], "Names", 1) + || !strncasecmp (argv[i], "Accouts", 1)) { + if(!acct_cond->assoc_cond->acct_list) { + acct_cond->assoc_cond->acct_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list( + acct_cond->assoc_cond->acct_list, + argv[i]+end)) + u_set = 1; + } else if (!strncasecmp (argv[i], "Clusters", 1)) { + if(!acct_cond->assoc_cond->cluster_list) { + acct_cond->assoc_cond->cluster_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list( + acct_cond->assoc_cond->cluster_list, + argv[i]+end)) + a_set = 1; + } else if (!strncasecmp (argv[i], "Descriptions", 1)) { + if(!acct_cond->description_list) { + acct_cond->description_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list(acct_cond->description_list, + argv[i]+end)) + u_set = 1; + } else if (!strncasecmp (argv[i], "Format", 1)) { if(format_list) - addto_char_list(format_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Names", 1) == 0 - || strncasecmp (argv[i], "Accouts", 1) == 0) { - addto_char_list(acct_cond->acct_list, argv[i]+end); - addto_char_list(acct_cond->assoc_cond->acct_list, - argv[i]); - u_set = 1; - } else if (strncasecmp (argv[i], "Organizations", 1) == 0) { - addto_char_list(acct_cond->organization_list, - argv[i]+end); - u_set = 1; - } else if (strncasecmp (argv[i], "Parent", 1) == 0) { + slurm_addto_char_list(format_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Organizations", 1)) { + if(!acct_cond->organization_list) { + acct_cond->organization_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list(acct_cond->organization_list, + argv[i]+end)) + u_set = 1; + } else if (!strncasecmp (argv[i], "Parent", 1)) { acct_cond->assoc_cond->parent_acct = strip_quotes(argv[i]+end, NULL); a_set = 1; - } else if (strncasecmp (argv[i], "QosLevel", 1) == 0) { - acct_cond->qos = str_2_acct_qos(argv[i]+end); + } else if (!strncasecmp (argv[i], "QosLevel", 1)) { + int option = 0; + if(!acct_cond->qos_list) { + acct_cond->qos_list = + list_create(slurm_destroy_char); + } + + if(!qos_list) { + qos_list = acct_storage_g_get_qos( + db_conn, NULL); + } + + addto_qos_char_list(acct_cond->qos_list, qos_list, + argv[i]+end, option); u_set = 1; } else { - printf(" Unknown condition: %s\n" + exit_code=1; + fprintf(stderr, " Unknown condition: %s\n" " Use keyword 'set' to modify " "SLURM_PRINT_VALUE\n", argv[i]); } } + + if(qos_list) + list_destroy(qos_list); + (*start) = i; if(a_set) @@ -115,61 +162,86 @@ static int _set_rec(int *start, int argc, char *argv[], int u_set = 0; int a_set = 0; int end = 0; + List qos_list = NULL; for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Where", 5) == 0) { + if (!strncasecmp (argv[i], "Where", 5)) { i--; break; } else if(!end && !strncasecmp(argv[i], "set", 3)) { continue; } else if(!end) { - printf(" Bad format on %s: End your option with " + exit_code=1; + fprintf(stderr, + " Bad format on %s: End your option with " "an '=' sign\n", argv[i]); - } else if (strncasecmp (argv[i], "Description", 1) == 0) { + } else if (!strncasecmp (argv[i], "Description", 1)) { acct->description = strip_quotes(argv[i]+end, NULL); u_set = 1; - } else if (strncasecmp (argv[i], "FairShare", 1) == 0) { + } else if (!strncasecmp (argv[i], "FairShare", 1)) { if (get_uint(argv[i]+end, &assoc->fairshare, - "FairShare") == SLURM_SUCCESS) + "FairShare") == SLURM_SUCCESS) a_set = 1; - } else if (strncasecmp (argv[i], "MaxCPUSec", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxCPUSec", 4)) { if (get_uint(argv[i]+end, &assoc->max_cpu_secs_per_job, - "MaxCPUSec") == SLURM_SUCCESS) + "MaxCPUSec") == SLURM_SUCCESS) a_set = 1; - } else if (strncasecmp (argv[i], "MaxJobs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxJobs", 4)) { if (get_uint(argv[i]+end, &assoc->max_jobs, - "MaxJobs") == SLURM_SUCCESS) + "MaxJobs") == SLURM_SUCCESS) a_set = 1; - } else if (strncasecmp (argv[i], "MaxNodes", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxNodes", 4)) { if (get_uint(argv[i]+end, &assoc->max_nodes_per_job, - "MaxNodes") == SLURM_SUCCESS) + "MaxNodes") == SLURM_SUCCESS) a_set = 1; - } else if (strncasecmp (argv[i], "MaxWall", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxWall", 4)) { mins = time_str2mins(argv[i]+end); if (mins != NO_VAL) { assoc->max_wall_duration_per_job = (uint32_t) mins; a_set = 1; } else { - printf(" Bad MaxWall time format: %s\n", + exit_code=1; + fprintf(stderr, + " Bad MaxWall time format: %s\n", argv[i]); } - } else if (strncasecmp (argv[i], "Organization", 1) == 0) { + } else if (!strncasecmp (argv[i], "Organization", 1)) { acct->organization = strip_quotes(argv[i]+end, NULL); u_set = 1; - } else if (strncasecmp (argv[i], "Parent", 1) == 0) { + } else if (!strncasecmp (argv[i], "Parent", 1)) { assoc->parent_acct = strip_quotes(argv[i]+end, NULL); a_set = 1; - } else if (strncasecmp (argv[i], "QosLevel=", 1) == 0) { - acct->qos = str_2_acct_qos(argv[i]+end); + } else if (!strncasecmp (argv[i], "QosLevel=", 1)) { + int option = 0; + if(!acct->qos_list) { + acct->qos_list = + list_create(slurm_destroy_char); + } + + if(!qos_list) { + qos_list = acct_storage_g_get_qos( + db_conn, NULL); + } + if(end > 2 && argv[i][end-1] == '=' + && (argv[i][end-2] == '+' + || argv[i][end-2] == '-')) + option = (int)argv[i][end-2]; + + addto_qos_char_list(acct->qos_list, qos_list, + argv[i]+end, option); u_set = 1; } else { - printf(" Unknown option: %s\n" - " Use keyword 'where' to modify condition\n", - argv[i]); + exit_code=1; + fprintf(stderr, " Unknown option: %s\n" + " Use keyword 'where' to modify condition\n", + argv[i]); } } + if(qos_list) + list_destroy(qos_list); + (*start) = i; if(u_set && a_set) @@ -196,7 +268,8 @@ extern int sacctmgr_add_account(int argc, char *argv[]) char *parent = NULL; char *cluster = NULL; char *name = NULL; - acct_qos_level_t qos = ACCT_QOS_NOTSET; + List add_qos_list = NULL; + List qos_list = NULL; List acct_list = NULL; List assoc_list = NULL; List local_assoc_list = NULL; @@ -213,46 +286,60 @@ extern int sacctmgr_add_account(int argc, char *argv[]) for (i=0; i<argc; i++) { int end = parse_option_end(argv[i]); if(!end) { - addto_char_list(name_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Cluster", 1) == 0) { - addto_char_list(cluster_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Description", 1) == 0) { + slurm_addto_char_list(name_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Cluster", 1)) { + slurm_addto_char_list(cluster_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Description", 1)) { description = strip_quotes(argv[i]+end, NULL); - } else if (strncasecmp (argv[i], "FairShare", 1) == 0) { + } else if (!strncasecmp (argv[i], "FairShare", 1)) { if (get_uint(argv[i]+end, &fairshare, "FairShare") == SLURM_SUCCESS) limit_set = 1; - } else if (strncasecmp (argv[i], "MaxCPUSecs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxCPUSecs", 4)) { if (get_uint(argv[i]+end, &max_cpu_secs_per_job, "MaxCPUSecs") == SLURM_SUCCESS) limit_set = 1; - } else if (strncasecmp (argv[i], "MaxJobs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxJobs", 4)) { if (get_uint(argv[i]+end, &max_jobs, "MaxJobs") == SLURM_SUCCESS) limit_set = 1; - } else if (strncasecmp (argv[i], "MaxNodes", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxNodes", 4)) { if (get_uint(argv[i]+end, &max_nodes_per_job, "MaxNodes") == SLURM_SUCCESS) limit_set = 1; - } else if (strncasecmp (argv[i], "MaxWall", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxWall", 4)) { mins = time_str2mins(argv[i]+end); if (mins != NO_VAL) { max_wall_duration_per_job = (uint32_t) mins; limit_set = 1; } else { - printf(" Bad MaxWall time format: %s\n", + exit_code=1; + fprintf(stderr, + " Bad MaxWall time format: %s\n", argv[i]); } - } else if (strncasecmp (argv[i], "Names", 1) == 0) { - addto_char_list(name_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Organization", 1) == 0) { + } else if (!strncasecmp (argv[i], "Names", 1)) { + slurm_addto_char_list(name_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Organization", 1)) { organization = strip_quotes(argv[i]+end, NULL); - } else if (strncasecmp (argv[i], "Parent", 1) == 0) { + } else if (!strncasecmp (argv[i], "Parent", 1)) { parent = strip_quotes(argv[i]+end, NULL); - } else if (strncasecmp (argv[i], "QosLevel", 1) == 0) { - qos = str_2_acct_qos(argv[i]+end); + } else if (!strncasecmp (argv[i], "QosLevel", 1)) { + int option = 0; + if(!add_qos_list) { + add_qos_list = + list_create(slurm_destroy_char); + } + + if(!qos_list) { + qos_list = acct_storage_g_get_qos( + db_conn, NULL); + } + addto_qos_char_list(add_qos_list, qos_list, + argv[i]+end, option); } else { - printf(" Unknown option: %s\n", argv[i]); + exit_code=1; + fprintf(stderr, " Unknown option: %s\n", argv[i]); } } @@ -262,21 +349,26 @@ extern int sacctmgr_add_account(int argc, char *argv[]) xfree(parent); xfree(description); xfree(organization); - printf(" Need name of account to add.\n"); + exit_code=1; + fprintf(stderr, " Need name of account to add.\n"); return SLURM_SUCCESS; } else { acct_account_cond_t account_cond; + acct_association_cond_t assoc_cond; memset(&account_cond, 0, sizeof(acct_account_cond_t)); - account_cond.acct_list = name_list; + memset(&assoc_cond, 0, sizeof(acct_association_cond_t)); + assoc_cond.acct_list = name_list; + account_cond.assoc_cond = &assoc_cond; local_account_list = acct_storage_g_get_accounts( db_conn, &account_cond); } if(!local_account_list) { - printf(" Problem getting accounts from database. " - "Contact your admin.\n"); + exit_code=1; + fprintf(stderr, " Problem getting accounts from database. " + "Contact your admin.\n"); list_destroy(name_list); list_destroy(cluster_list); xfree(parent); @@ -294,7 +386,9 @@ extern int sacctmgr_add_account(int argc, char *argv[]) temp_list = acct_storage_g_get_clusters(db_conn, NULL); if(!cluster_list) { - printf(" Problem getting clusters from database. " + exit_code=1; + fprintf(stderr, + " Problem getting clusters from database. " "Contact your admin.\n"); list_destroy(name_list); list_destroy(cluster_list); @@ -312,8 +406,11 @@ extern int sacctmgr_add_account(int argc, char *argv[]) list_iterator_destroy(itr_c); if(!list_count(cluster_list)) { - printf(" Can't add accounts, no cluster defined yet.\n" - " Please contact your administrator.\n"); + exit_code=1; + fprintf(stderr, + " Can't add accounts, no cluster " + "defined yet.\n" + " Please contact your administrator.\n"); list_destroy(name_list); list_destroy(cluster_list); list_destroy(local_account_list); @@ -342,7 +439,8 @@ extern int sacctmgr_add_account(int argc, char *argv[]) break; } if(!cluster_rec) { - printf(" error: This cluster '%s' " + exit_code=1; + fprintf(stderr, " This cluster '%s' " "doesn't exist.\n" " Contact your admin " "to add it to accounting.\n", @@ -380,7 +478,8 @@ extern int sacctmgr_add_account(int argc, char *argv[]) db_conn, &assoc_cond); list_destroy(assoc_cond.acct_list); if(!local_assoc_list) { - printf(" Problem getting associations from database. " + exit_code=1; + fprintf(stderr, " Problem getting associations from database. " "Contact your admin.\n"); list_destroy(name_list); list_destroy(cluster_list); @@ -409,8 +508,18 @@ extern int sacctmgr_add_account(int argc, char *argv[]) acct->organization = xstrdup(parent); else acct->organization = xstrdup(name); - - acct->qos = qos; + if(add_qos_list && list_count(add_qos_list)) { + char *tmp_qos = NULL; + ListIterator qos_itr = + list_iterator_create(add_qos_list); + acct->qos_list = + list_create(slurm_destroy_char); + while((tmp_qos = list_next(qos_itr))) { + list_append(acct->qos_list, + xstrdup(tmp_qos)); + } + list_iterator_destroy(qos_itr); + } xstrfmtcat(acct_str, " %s\n", name); list_append(acct_list, acct); } @@ -424,7 +533,8 @@ extern int sacctmgr_add_account(int argc, char *argv[]) } if(!sacctmgr_find_account_base_assoc_from_list( local_assoc_list, parent, cluster)) { - printf(" error: Parent account '%s' " + exit_code=1; + fprintf(stderr, " Parent account '%s' " "doesn't exist on " "cluster %s\n" " Contact your admin " @@ -467,7 +577,8 @@ extern int sacctmgr_add_account(int argc, char *argv[]) printf(" Nothing new added.\n"); goto end_it; } else if(!assoc_str) { - printf(" Error: no associations created.\n"); + exit_code=1; + fprintf(stderr, " No associations created.\n"); goto end_it; } @@ -485,8 +596,14 @@ extern int sacctmgr_add_account(int argc, char *argv[]) printf(" Organization = %s\n", "Parent/Account Name"); - if(qos != ACCT_QOS_NOTSET) - printf(" Qos = %s\n", acct_qos_str(qos)); + if(add_qos_list) { + char *temp_char = get_qos_complete_str( + qos_list, add_qos_list); + if(temp_char) { + printf(" Qos = %s\n", temp_char); + xfree(temp_char); + } + } xfree(acct_str); } @@ -538,7 +655,8 @@ extern int sacctmgr_add_account(int argc, char *argv[]) rc = acct_storage_g_add_associations(db_conn, my_uid, assoc_list); } else { - printf(" error: Problem adding accounts\n"); + exit_code=1; + fprintf(stderr, " Problem adding accounts\n"); rc = SLURM_ERROR; notice_thread_fini(); goto end_it; @@ -553,11 +671,15 @@ extern int sacctmgr_add_account(int argc, char *argv[]) acct_storage_g_commit(db_conn, 0); } } else { - printf(" error: Problem adding account associations\n"); + exit_code=1; + fprintf(stderr, + " error: Problem adding account associations\n"); rc = SLURM_ERROR; } end_it: + if(add_qos_list) + list_destroy(add_qos_list); list_destroy(acct_list); list_destroy(assoc_list); @@ -580,6 +702,7 @@ extern int sacctmgr_list_account(int argc, char *argv[]) acct_account_rec_t *acct = NULL; acct_association_rec_t *assoc = NULL; char *object; + List qos_list = NULL; print_field_t *field = NULL; @@ -599,7 +722,6 @@ extern int sacctmgr_list_account(int argc, char *argv[]) PRINT_MAXW, PRINT_ORG, PRINT_QOS, - PRINT_QOS_GOLD, PRINT_QOS_RAW, PRINT_PID, PRINT_PNAME, @@ -607,37 +729,25 @@ extern int sacctmgr_list_account(int argc, char *argv[]) PRINT_USER }; - acct_cond->acct_list = list_create(slurm_destroy_char); - acct_cond->description_list = list_create(slurm_destroy_char); - acct_cond->organization_list = list_create(slurm_destroy_char); acct_cond->with_assocs = with_assoc_flag; - acct_cond->assoc_cond = xmalloc(sizeof(acct_association_cond_t)); - acct_cond->assoc_cond->user_list = list_create(slurm_destroy_char); - acct_cond->assoc_cond->acct_list = list_create(slurm_destroy_char); - acct_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); - acct_cond->assoc_cond->partition_list = list_create(slurm_destroy_char); - _set_cond(&i, argc, argv, acct_cond, format_list); - if(!list_count(format_list)) { - addto_char_list(format_list, "A,D,O,Q"); + if(exit_code) { + destroy_acct_account_cond(acct_cond); + list_destroy(format_list); + return SLURM_ERROR; + } else if(!list_count(format_list)) { + slurm_addto_char_list(format_list, "A,D,O,Q"); if(acct_cond->with_assocs) - addto_char_list(format_list, + slurm_addto_char_list(format_list, "Cl,ParentN,U,F,MaxC,MaxJ,MaxN,MaxW"); if(acct_cond->with_coords) - addto_char_list(format_list, "Coord"); + slurm_addto_char_list(format_list, "Coord"); } - acct_list = acct_storage_g_get_accounts(db_conn, acct_cond); - destroy_acct_account_cond(acct_cond); - if(!acct_list) { - printf(" Problem with query.\n"); - list_destroy(format_list); - return SLURM_ERROR; - } print_fields_list = list_create(destroy_print_field); itr = list_iterator_create(format_list); @@ -698,21 +808,16 @@ extern int sacctmgr_list_account(int argc, char *argv[]) field->name = xstrdup("Org"); field->len = 20; field->print_routine = print_fields_str; - } else if(!strncasecmp("QOSGOLD", object, 4)) { - field->type = PRINT_QOS_GOLD; - field->name = xstrdup("QOS_GOLD"); - field->len = 7; - field->print_routine = print_fields_uint; } else if(!strncasecmp("QOSRAW", object, 4)) { field->type = PRINT_QOS_RAW; field->name = xstrdup("QOS_RAW"); - field->len = 7; - field->print_routine = print_fields_uint; + field->len = 10; + field->print_routine = print_fields_char_list; } else if(!strncasecmp("QOS", object, 1)) { field->type = PRINT_QOS; field->name = xstrdup("QOS"); - field->len = 9; - field->print_routine = print_fields_str; + field->len = 20; + field->print_routine = sacctmgr_print_qos_list; } else if(!strncasecmp("ParentID", object, 7)) { field->type = PRINT_PID; field->name = xstrdup("Par ID"); @@ -729,7 +834,8 @@ extern int sacctmgr_list_account(int argc, char *argv[]) field->len = 10; field->print_routine = print_fields_str; } else { - printf("Unknown field '%s'\n", object); + exit_code=1; + fprintf(stderr, "Unknown field '%s'\n", object); xfree(field); continue; } @@ -738,6 +844,22 @@ extern int sacctmgr_list_account(int argc, char *argv[]) list_iterator_destroy(itr); list_destroy(format_list); + if(exit_code) { + destroy_acct_account_cond(acct_cond); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + + acct_list = acct_storage_g_get_accounts(db_conn, acct_cond); + destroy_acct_account_cond(acct_cond); + + if(!acct_list) { + exit_code=1; + fprintf(stderr, " Problem with query.\n"); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + itr = list_iterator_create(acct_list); itr2 = list_iterator_create(print_fields_list); print_fields_header(print_fields_list); @@ -752,107 +874,97 @@ extern int sacctmgr_list_account(int argc, char *argv[]) switch(field->type) { case PRINT_ACCOUNT: field->print_routine( - SLURM_PRINT_VALUE, field, acct->name); break; case PRINT_CLUSTER: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->cluster); break; case PRINT_COORDS: field->print_routine( - SLURM_PRINT_VALUE, field, acct->coordinators); break; case PRINT_DESC: field->print_routine( - SLURM_PRINT_VALUE, field, acct->description); break; case PRINT_FAIRSHARE: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->fairshare); break; case PRINT_ID: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->id); break; case PRINT_MAXC: field->print_routine( - SLURM_PRINT_VALUE, field, assoc-> max_cpu_secs_per_job); break; case PRINT_MAXJ: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->max_jobs); break; case PRINT_MAXN: field->print_routine( - SLURM_PRINT_VALUE, field, assoc-> max_nodes_per_job); break; case PRINT_MAXW: field->print_routine( - SLURM_PRINT_VALUE, field, assoc-> max_wall_duration_per_job); break; case PRINT_ORG: field->print_routine( - SLURM_PRINT_VALUE, field, acct->organization); break; case PRINT_QOS: + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } field->print_routine( - SLURM_PRINT_VALUE, field, - acct_qos_str( - acct->qos)); - break; - case PRINT_QOS_GOLD: - field->print_routine( - SLURM_PRINT_VALUE, - field, - acct->qos-1); + qos_list, + acct->qos_list); break; case PRINT_QOS_RAW: + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } field->print_routine( - SLURM_PRINT_VALUE, field, - acct->qos); + qos_list, + acct->qos_list); break; case PRINT_PID: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->parent_id); break; case PRINT_PNAME: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->parent_acct); break; case PRINT_PART: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->partition); break; case PRINT_USER: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->user); break; default: @@ -868,93 +980,85 @@ extern int sacctmgr_list_account(int argc, char *argv[]) switch(field->type) { case PRINT_ACCOUNT: field->print_routine( - SLURM_PRINT_VALUE, field, acct->name); break; case PRINT_CLUSTER: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_COORDS: field->print_routine( - SLURM_PRINT_VALUE, field, acct->coordinators); break; case PRINT_DESC: field->print_routine( - SLURM_PRINT_VALUE, field, acct->description); break; case PRINT_FAIRSHARE: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_ID: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_MAXC: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_MAXJ: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_MAXN: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_MAXW: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_ORG: field->print_routine( - SLURM_PRINT_VALUE, field, acct->organization); break; case PRINT_QOS: + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } field->print_routine( - SLURM_PRINT_VALUE, - field, acct_qos_str(acct->qos)); - break; - case PRINT_QOS_GOLD: - field->print_routine( - SLURM_PRINT_VALUE, field, - acct->qos-1); + field, qos_list, + acct->qos_list); break; case PRINT_QOS_RAW: + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } field->print_routine( - SLURM_PRINT_VALUE, field, - acct->qos); + field, qos_list, + acct->qos_list); break; case PRINT_PID: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_PNAME: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_PART: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_USER: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; default: @@ -986,19 +1090,6 @@ extern int sacctmgr_modify_account(int argc, char *argv[]) int cond_set = 0, rec_set = 0, set = 0; List ret_list = NULL; - acct_cond->acct_list = list_create(slurm_destroy_char); - acct_cond->description_list = list_create(slurm_destroy_char); - acct_cond->organization_list = list_create(slurm_destroy_char); - - acct_cond->assoc_cond = xmalloc(sizeof(acct_association_cond_t)); - acct_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); - acct_cond->assoc_cond->acct_list = list_create(slurm_destroy_char); - acct_cond->assoc_cond->fairshare = NO_VAL; - acct_cond->assoc_cond->max_cpu_secs_per_job = NO_VAL; - acct_cond->assoc_cond->max_jobs = NO_VAL; - acct_cond->assoc_cond->max_nodes_per_job = NO_VAL; - acct_cond->assoc_cond->max_wall_duration_per_job = NO_VAL; - assoc->fairshare = NO_VAL; assoc->max_cpu_secs_per_job = NO_VAL; assoc->max_jobs = NO_VAL; @@ -1006,10 +1097,10 @@ extern int sacctmgr_modify_account(int argc, char *argv[]) assoc->max_wall_duration_per_job = NO_VAL; for (i=0; i<argc; i++) { - if (strncasecmp (argv[i], "Where", 5) == 0) { + if (!strncasecmp (argv[i], "Where", 5)) { i++; cond_set = _set_cond(&i, argc, argv, acct_cond, NULL); - } else if (strncasecmp (argv[i], "Set", 3) == 0) { + } else if (!strncasecmp (argv[i], "Set", 3)) { i++; rec_set = _set_rec(&i, argc, argv, acct, assoc); } else { @@ -1018,7 +1109,8 @@ extern int sacctmgr_modify_account(int argc, char *argv[]) } if(!rec_set) { - printf(" You didn't give me anything to set\n"); + exit_code=1; + fprintf(stderr, " You didn't give me anything to set\n"); destroy_acct_account_cond(acct_cond); destroy_acct_account_rec(acct); destroy_acct_association_rec(assoc); @@ -1050,11 +1142,13 @@ extern int sacctmgr_modify_account(int argc, char *argv[]) printf(" %s\n", object); } list_iterator_destroy(itr); + set = 1; } else if(ret_list) { printf(" Nothing modified\n"); rc = SLURM_ERROR; } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } @@ -1079,7 +1173,8 @@ assoc_start: } else if(ret_list) { printf(" Nothing modified\n"); } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } @@ -1110,25 +1205,40 @@ extern int sacctmgr_delete_account(int argc, char *argv[]) xmalloc(sizeof(acct_account_cond_t)); int i=0; List ret_list = NULL; + ListIterator itr = NULL; int set = 0; - acct_cond->acct_list = list_create(slurm_destroy_char); - acct_cond->description_list = list_create(slurm_destroy_char); - acct_cond->organization_list = list_create(slurm_destroy_char); - - acct_cond->assoc_cond = xmalloc(sizeof(acct_association_cond_t)); - acct_cond->assoc_cond->user_list = list_create(slurm_destroy_char); - acct_cond->assoc_cond->acct_list = list_create(slurm_destroy_char); - acct_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); - acct_cond->assoc_cond->partition_list = - list_create(slurm_destroy_char); - if(!(set = _set_cond(&i, argc, argv, acct_cond, NULL))) { - printf(" No conditions given to remove, not executing.\n"); + exit_code=1; + fprintf(stderr, + " No conditions given to remove, not executing.\n"); destroy_acct_account_cond(acct_cond); return SLURM_ERROR; } + /* check to see if person is trying to remove root account. This is + * bad, and should not be allowed outside of deleting a cluster. + */ + if(acct_cond->assoc_cond + && acct_cond->assoc_cond->acct_list + && list_count(acct_cond->assoc_cond->acct_list)) { + char *tmp_char = NULL; + itr = list_iterator_create(acct_cond->assoc_cond->acct_list); + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, "root")) + break; + } + list_iterator_destroy(itr); + if(tmp_char) { + exit_code=1; + fprintf(stderr, " You are not allowed to remove " + "the root account.\n" + " Use remove cluster instead.\n"); + destroy_acct_account_cond(acct_cond); + return SLURM_ERROR; + } + } + notice_thread_init(); if(set == 1) { ret_list = acct_storage_g_remove_accounts( @@ -1161,7 +1271,8 @@ extern int sacctmgr_delete_account(int argc, char *argv[]) } else if(ret_list) { printf(" Nothing deleted\n"); } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } diff --git a/src/sacctmgr/association_functions.c b/src/sacctmgr/association_functions.c index 9a754fdef..461a19fdb 100644 --- a/src/sacctmgr/association_functions.c +++ b/src/sacctmgr/association_functions.c @@ -123,44 +123,62 @@ static int _set_cond(int *start, int argc, char *argv[], for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (!end && strncasecmp (argv[i], "Tree", 4) == 0) { + if (!end && !strncasecmp (argv[i], "Tree", 4)) { tree_display = 1; + } else if (!end && !strncasecmp (argv[i], "WithDeleted", 5)) { + association_cond->with_deleted = 1; + } else if (!end && !strncasecmp (argv[i], "WOPInfo", 4)) { + association_cond->without_parent_info = 1; + } else if (!end && !strncasecmp (argv[i], "WOPLimits", 4)) { + association_cond->without_parent_limits = 1; } else if(!end && !strncasecmp(argv[i], "where", 5)) { continue; - } else if(!end) { - addto_char_list(association_cond->id_list, argv[i]); + } else if(!end || !strncasecmp (argv[i], "Id", 1) + || !strncasecmp (argv[i], "Associations", 2)) { + if(!association_cond->id_list) + association_cond->id_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(association_cond->id_list, + argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Id", 1) == 0) { - addto_char_list(association_cond->id_list, argv[i]+end); - set = 1; - } else if (strncasecmp (argv[i], "Associations", 2) == 0) { - addto_char_list(association_cond->id_list, argv[i]+end); - set = 1; - } else if (strncasecmp (argv[i], "Users", 1) == 0) { - addto_char_list(association_cond->user_list, + } else if (!strncasecmp (argv[i], "Users", 1)) { + if(!association_cond->user_list) + association_cond->user_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(association_cond->user_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Accounts", 2) == 0) { - addto_char_list(association_cond->acct_list, + } else if (!strncasecmp (argv[i], "Accounts", 2)) { + if(!association_cond->acct_list) + association_cond->acct_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(association_cond->acct_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Clusters", 1) == 0) { - addto_char_list(association_cond->cluster_list, + } else if (!strncasecmp (argv[i], "Clusters", 1)) { + if(!association_cond->cluster_list) + association_cond->cluster_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(association_cond->cluster_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Format", 1) == 0) { + } else if (!strncasecmp (argv[i], "Format", 1)) { if(format_list) - addto_char_list(format_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Partitions", 4) == 0) { - addto_char_list(association_cond->partition_list, + slurm_addto_char_list(format_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Partitions", 4)) { + if(!association_cond->partition_list) + association_cond->partition_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(association_cond->partition_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Parent", 4) == 0) { + } else if (!strncasecmp (argv[i], "Parent", 4)) { association_cond->parent_acct = strip_quotes(argv[i]+end, NULL); set = 1; } else { - printf(" Unknown condition: %s\n", argv[i]); + exit_code = 1; + fprintf(stderr, " Unknown condition: %s\n", argv[i]); } } (*start) = i; @@ -332,6 +350,7 @@ extern int sacctmgr_list_association(int argc, char *argv[]) PRINT_CLUSTER, PRINT_FAIRSHARE, PRINT_ID, + PRINT_LFT, PRINT_MAXC, PRINT_MAXJ, PRINT_MAXN, @@ -339,31 +358,22 @@ extern int sacctmgr_list_association(int argc, char *argv[]) PRINT_PID, PRINT_PNAME, PRINT_PART, + PRINT_RGT, PRINT_USER }; - assoc_cond->id_list = list_create(slurm_destroy_char); - assoc_cond->user_list = list_create(slurm_destroy_char); - assoc_cond->acct_list = list_create(slurm_destroy_char); - assoc_cond->cluster_list = list_create(slurm_destroy_char); - _set_cond(&i, argc, argv, assoc_cond, format_list); - assoc_list = acct_storage_g_get_associations(db_conn, assoc_cond); - destroy_acct_association_cond(assoc_cond); - - if(!assoc_list) { - printf(" Problem with query.\n"); + if(exit_code) { + destroy_acct_association_cond(assoc_cond); list_destroy(format_list); return SLURM_ERROR; - } + } else if(!list_count(format_list)) + slurm_addto_char_list(format_list, + "C,A,U,F,MaxC,MaxJ,MaxN,MaxW"); + print_fields_list = list_create(destroy_print_field); - first_list = assoc_list; - assoc_list = _sort_assoc_list(first_list); - if(!list_count(format_list)) - addto_char_list(format_list, "C,A,U,F,MaxC,MaxJ,MaxN,MaxW"); - itr = list_iterator_create(format_list); while((object = list_next(itr))) { field = xmalloc(sizeof(print_field_t)); @@ -390,6 +400,11 @@ extern int sacctmgr_list_association(int argc, char *argv[]) field->name = xstrdup("ID"); field->len = 6; field->print_routine = print_fields_uint; + } else if(!strncasecmp("LFT", object, 1)) { + field->type = PRINT_LFT; + field->name = xstrdup("LFT"); + field->len = 6; + field->print_routine = print_fields_uint; } else if(!strncasecmp("MaxCPUSecs", object, 4)) { field->type = PRINT_MAXC; field->name = xstrdup("MaxCPUSecs"); @@ -425,13 +440,20 @@ extern int sacctmgr_list_association(int argc, char *argv[]) field->name = xstrdup("Partition"); field->len = 10; field->print_routine = print_fields_str; + } else if(!strncasecmp("RGT", object, 1)) { + field->type = PRINT_RGT; + field->name = xstrdup("RGT"); + field->len = 6; + field->print_routine = print_fields_uint; } else if(!strncasecmp("User", object, 1)) { field->type = PRINT_USER; field->name = xstrdup("User"); field->len = 10; field->print_routine = print_fields_str; } else { - printf("Unknown field '%s'\n", object); + exit_code=1; + fprintf(stderr, "Unknown field '%s'\n", object); + exit(1); xfree(field); continue; } @@ -440,6 +462,24 @@ extern int sacctmgr_list_association(int argc, char *argv[]) list_iterator_destroy(itr); list_destroy(format_list); + if(exit_code) { + destroy_acct_association_cond(assoc_cond); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + + assoc_list = acct_storage_g_get_associations(db_conn, assoc_cond); + destroy_acct_association_cond(assoc_cond); + + if(!assoc_list) { + exit_code=1; + fprintf(stderr, " Problem with query.\n"); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + first_list = assoc_list; + assoc_list = _sort_assoc_list(first_list); + itr = list_iterator_create(assoc_list); itr2 = list_iterator_create(print_fields_list); print_fields_header(print_fields_list); @@ -477,53 +517,61 @@ extern int sacctmgr_list_association(int argc, char *argv[]) } else { print_acct = assoc->acct; } - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, print_acct); break; case PRINT_CLUSTER: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->cluster); break; case PRINT_FAIRSHARE: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->fairshare); break; case PRINT_ID: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->id); break; + case PRINT_LFT: + field->print_routine(field, + assoc->lft); + break; case PRINT_MAXC: field->print_routine( - SLURM_PRINT_VALUE, field, + field, assoc->max_cpu_secs_per_job); break; case PRINT_MAXJ: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->max_jobs); break; case PRINT_MAXN: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->max_nodes_per_job); break; case PRINT_MAXW: field->print_routine( - SLURM_PRINT_VALUE, field, + field, assoc->max_wall_duration_per_job); break; case PRINT_PID: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->parent_id); break; case PRINT_PNAME: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->parent_acct); break; case PRINT_PART: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->partition); break; + case PRINT_RGT: + field->print_routine(field, + assoc->rgt); + break; case PRINT_USER: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->user); break; default: diff --git a/src/sacctmgr/cluster_functions.c b/src/sacctmgr/cluster_functions.c index 2c0ac1653..3a39ddb32 100644 --- a/src/sacctmgr/cluster_functions.c +++ b/src/sacctmgr/cluster_functions.c @@ -49,24 +49,25 @@ static int _set_cond(int *start, int argc, char *argv[], for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Set", 3) == 0) { + if (!strncasecmp (argv[i], "Set", 3)) { i--; break; } else if(!end && !strncasecmp(argv[i], "where", 5)) { continue; - } else if(!end) { - addto_char_list(cluster_list, argv[i]); - set = 1; - } else if (strncasecmp (argv[i], "Format", 1) == 0) { + } else if(!end || !strncasecmp (argv[i], "Names", 1)) { + if(cluster_list) { + if(slurm_addto_char_list(cluster_list, + argv[i]+end)) + set = 1; + } + } else if (!strncasecmp (argv[i], "Format", 1)) { if(format_list) - addto_char_list(format_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Names", 1) == 0) { - addto_char_list(cluster_list, - argv[i]+end); - set = 1; + slurm_addto_char_list(format_list, argv[i]+end); } else { - printf(" Unknown condition: %s\n" - "Use keyword set to modify value\n", argv[i]); + exit_code=1; + fprintf(stderr, " Unknown condition: %s\n" + "Use keyword set to modify value\n", argv[i]); + break; } } (*start) = i; @@ -83,46 +84,51 @@ static int _set_rec(int *start, int argc, char *argv[], for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Where", 5) == 0) { + if (!strncasecmp (argv[i], "Where", 5)) { i--; break; } else if(!end && !strncasecmp(argv[i], "set", 3)) { continue; } else if(!end) { - printf(" Bad format on %s: End your option with " + exit_code=1; + fprintf(stderr, + " Bad format on %s: End your option with " "an '=' sign\n", argv[i]); - } else if (strncasecmp (argv[i], "FairShare", 1) == 0) { + } else if (!strncasecmp (argv[i], "FairShare", 1)) { if (get_uint(argv[i]+end, &assoc->fairshare, "FairShare") == SLURM_SUCCESS) set = 1; - } else if (strncasecmp (argv[i], "MaxJobs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxJobs", 4)) { if (get_uint(argv[i]+end, &assoc->max_jobs, "MaxJobs") == SLURM_SUCCESS) set = 1; - } else if (strncasecmp (argv[i], "MaxNodes", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxNodes", 4)) { if (get_uint(argv[i]+end, &assoc->max_nodes_per_job, "MaxNodes") == SLURM_SUCCESS) set = 1; - } else if (strncasecmp (argv[i], "MaxWall", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxWall", 4)) { mins = time_str2mins(argv[i]+end); if (mins != NO_VAL) { assoc->max_wall_duration_per_job = (uint32_t) mins; set = 1; } else { - printf(" Bad MaxWall time format: %s\n", + exit_code=1; + fprintf(stderr, + " Bad MaxWall time format: %s\n", argv[i]); } - } else if (strncasecmp (argv[i], "MaxCPUSecs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxCPUSecs", 4)) { if (get_uint(argv[i]+end, &assoc->max_cpu_secs_per_job, "MaxCPUSecs") == SLURM_SUCCESS) set = 1; } else { - printf(" Unknown option: %s\n" - " Use keyword 'where' to modify condition\n", - argv[i]); + exit_code=1; + fprintf(stderr, " Unknown option: %s\n" + " Use keyword 'where' to modify condition\n", + argv[i]); } } (*start) = i; @@ -151,38 +157,45 @@ extern int sacctmgr_add_cluster(int argc, char *argv[]) for (i=0; i<argc; i++) { int end = parse_option_end(argv[i]); if(!end) { - addto_char_list(name_list, argv[i]+end); - } else if (strncasecmp (argv[i], "FairShare", 1) == 0) { + slurm_addto_char_list(name_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "FairShare", 1)) { fairshare = atoi(argv[i]+end); limit_set = 1; - } else if (strncasecmp (argv[i], "MaxCPUSecs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxCPUSecs", 4)) { max_cpu_secs_per_job = atoi(argv[i]+end); limit_set = 1; - } else if (strncasecmp (argv[i], "MaxJobs=", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxJobs=", 4)) { max_jobs = atoi(argv[i]+end); limit_set = 1; - } else if (strncasecmp (argv[i], "MaxNodes", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxNodes", 4)) { max_nodes_per_job = atoi(argv[i]+end); limit_set = 1; - } else if (strncasecmp (argv[i], "MaxWall", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxWall", 4)) { mins = time_str2mins(argv[i]+end); if (mins != NO_VAL) { max_wall_duration_per_job = (uint32_t) mins; limit_set = 1; } else { - printf(" Bad MaxWall time format: %s\n", + exit_code=1; + fprintf(stderr, + " Bad MaxWall time format: %s\n", argv[i]); } - } else if (strncasecmp (argv[i], "Names", 1) == 0) { - addto_char_list(name_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Names", 1)) { + slurm_addto_char_list(name_list, argv[i]+end); } else { - printf(" Unknown option: %s\n", argv[i]); + exit_code=1; + fprintf(stderr, " Unknown option: %s\n", argv[i]); } } - if(!list_count(name_list)) { + if(exit_code) { list_destroy(name_list); - printf(" Need name of cluster to add.\n"); + return SLURM_ERROR; + } else if(!list_count(name_list)) { + list_destroy(name_list); + exit_code=1; + fprintf(stderr, " Need name of cluster to add.\n"); return SLURM_ERROR; } else { List temp_list = NULL; @@ -194,8 +207,10 @@ extern int sacctmgr_add_cluster(int argc, char *argv[]) temp_list = acct_storage_g_get_clusters(db_conn, &cluster_cond); if(!temp_list) { - printf(" Problem getting clusters from database. " - "Contact your admin.\n"); + exit_code=1; + fprintf(stderr, + " Problem getting clusters from database. " + "Contact your admin.\n"); return SLURM_ERROR; } @@ -293,7 +308,8 @@ extern int sacctmgr_add_cluster(int argc, char *argv[]) acct_storage_g_commit(db_conn, 0); } } else { - printf(" error: problem adding clusters\n"); + exit_code=1; + fprintf(stderr, " Problem adding clusters\n"); } end_it: list_destroy(cluster_list); @@ -332,12 +348,8 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) cluster_cond->cluster_list = list_create(slurm_destroy_char); _set_cond(&i, argc, argv, cluster_cond->cluster_list, format_list); - - cluster_list = acct_storage_g_get_clusters(db_conn, cluster_cond); - destroy_acct_cluster_cond(cluster_cond); - - if(!cluster_list) { - printf(" Problem with query.\n"); + if(exit_code) { + destroy_acct_cluster_cond(cluster_cond); list_destroy(format_list); return SLURM_ERROR; } @@ -345,8 +357,9 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) print_fields_list = list_create(destroy_print_field); if(!list_count(format_list)) { - addto_char_list(format_list, - "Cl,Controlh,Controlp,F,MaxC,MaxJ,MaxN,MaxW"); + slurm_addto_char_list(format_list, + "Cl,Controlh,Controlp,F,MaxC," + "MaxJ,MaxN,MaxW"); } itr = list_iterator_create(format_list); @@ -393,7 +406,8 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) field->len = 11; field->print_routine = print_fields_time; } else { - printf("Unknown field '%s'\n", object); + exit_code=1; + fprintf(stderr, "Unknown field '%s'\n", object); xfree(field); continue; } @@ -402,6 +416,22 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) list_iterator_destroy(itr); list_destroy(format_list); + if(exit_code) { + destroy_acct_cluster_cond(cluster_cond); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + + cluster_list = acct_storage_g_get_clusters(db_conn, cluster_cond); + destroy_acct_cluster_cond(cluster_cond); + + if(!cluster_list) { + exit_code=1; + fprintf(stderr, " Problem with query.\n"); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + itr = list_iterator_create(cluster_list); itr2 = list_iterator_create(print_fields_list); print_fields_header(print_fields_list); @@ -410,40 +440,40 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) while((field = list_next(itr2))) { switch(field->type) { case PRINT_CLUSTER: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, cluster->name); break; case PRINT_CHOST: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, cluster->control_host); break; case PRINT_CPORT: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, cluster->control_port); break; case PRINT_FAIRSHARE: field->print_routine( - SLURM_PRINT_VALUE, field, + field, cluster->default_fairshare); break; case PRINT_MAXC: field->print_routine( - SLURM_PRINT_VALUE, field, + field, cluster->default_max_cpu_secs_per_job); break; case PRINT_MAXJ: field->print_routine( - SLURM_PRINT_VALUE, field, + field, cluster->default_max_jobs); break; case PRINT_MAXN: field->print_routine( - SLURM_PRINT_VALUE, field, + field, cluster->default_max_nodes_per_job); break; case PRINT_MAXW: field->print_routine( - SLURM_PRINT_VALUE, field, + field, cluster-> default_max_wall_duration_per_job); break; @@ -489,12 +519,12 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[]) assoc->max_wall_duration_per_job = NO_VAL; for (i=0; i<argc; i++) { - if (strncasecmp (argv[i], "Where", 5) == 0) { + if (!strncasecmp (argv[i], "Where", 5)) { i++; if(_set_cond(&i, argc, argv, assoc_cond->cluster_list, NULL)) cond_set = 1; - } else if (strncasecmp (argv[i], "Set", 3) == 0) { + } else if (!strncasecmp (argv[i], "Set", 3)) { i++; if(_set_rec(&i, argc, argv, assoc)) rec_set = 1; @@ -506,7 +536,8 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[]) } if(!rec_set) { - printf(" You didn't give me anything to set\n"); + exit_code=1; + fprintf(stderr, " You didn't give me anything to set\n"); destroy_acct_association_rec(assoc); destroy_acct_association_cond(assoc_cond); return SLURM_ERROR; @@ -518,6 +549,10 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[]) destroy_acct_association_cond(assoc_cond); return SLURM_SUCCESS; } + } else if(exit_code) { + destroy_acct_association_rec(assoc); + destroy_acct_association_cond(assoc_cond); + return SLURM_ERROR; } printf(" Setting\n"); @@ -573,7 +608,8 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[]) } else if(ret_list) { printf(" Nothing modified\n"); } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } @@ -606,7 +642,9 @@ extern int sacctmgr_delete_cluster(int argc, char *argv[]) cluster_cond->cluster_list = list_create(slurm_destroy_char); if(!_set_cond(&i, argc, argv, cluster_cond->cluster_list, NULL)) { - printf(" No conditions given to remove, not executing.\n"); + exit_code=1; + fprintf(stderr, + " No conditions given to remove, not executing.\n"); destroy_acct_cluster_cond(cluster_cond); return SLURM_ERROR; } @@ -639,7 +677,8 @@ extern int sacctmgr_delete_cluster(int argc, char *argv[]) } else if(ret_list) { printf(" Nothing deleted\n"); } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } @@ -666,32 +705,40 @@ extern int sacctmgr_dump_cluster (int argc, char *argv[]) int end = parse_option_end(argv[i]); if(!end) { if(cluster_name) { - printf(" Can only do one cluster at a time. " + exit_code=1; + fprintf(stderr, + " Can only do one cluster at a time. " "Already doing %s\n", cluster_name); continue; } cluster_name = xstrdup(argv[i]+end); - } else if (strncasecmp (argv[i], "File", 1) == 0) { + } else if (!strncasecmp (argv[i], "File", 1)) { if(file_name) { - printf(" File name already set to %s\n", - file_name); + exit_code=1; + fprintf(stderr, + " File name already set to %s\n", + file_name); continue; } file_name = xstrdup(argv[i]+end); - } else if (strncasecmp (argv[i], "Name", 1) == 0) { + } else if (!strncasecmp (argv[i], "Name", 1)) { if(cluster_name) { - printf(" Can only do one cluster at a time. " - "Already doing %s\n", cluster_name); + exit_code=1; + fprintf(stderr, + " Can only do one cluster at a time. " + "Already doing %s\n", cluster_name); continue; } cluster_name = xstrdup(argv[i]+end); } else { - printf(" Unknown option: %s\n", argv[i]); + exit_code=1; + fprintf(stderr, " Unknown option: %s\n", argv[i]); } } if(!cluster_name) { - printf(" We need a cluster to dump.\n"); + exit_code=1; + fprintf(stderr, " We need a cluster to dump.\n"); return SLURM_ERROR; } @@ -709,11 +756,13 @@ extern int sacctmgr_dump_cluster (int argc, char *argv[]) list_destroy(assoc_cond.cluster_list); if(!assoc_list) { - printf(" Problem with query.\n"); + exit_code=1; + fprintf(stderr, " Problem with query.\n"); xfree(cluster_name); return SLURM_ERROR; } else if(!list_count(assoc_list)) { - printf(" Cluster %s returned nothing.", cluster_name); + exit_code=1; + fprintf(stderr, " Cluster %s returned nothing.", cluster_name); xfree(cluster_name); return SLURM_ERROR; } @@ -753,12 +802,14 @@ extern int sacctmgr_dump_cluster (int argc, char *argv[]) "# User - lipari:MaxNodesPerJob=2:MaxJobs=3:" "MaxProcSecondsPerJob=4:FairShare=1:" "MaxWallDurationPerJob=1\n") < 0) { - error("Can't write to file"); + exit_code=1; + fprintf(stderr, "Can't write to file"); return SLURM_ERROR; } if(fprintf(fd, "Cluster - %s\n", cluster_name) < 0) { - error("Can't write to file"); + exit_code=1; + fprintf(stderr, "Can't write to file"); return SLURM_ERROR; } diff --git a/src/sacctmgr/common.c b/src/sacctmgr/common.c index 68f2960c1..d043fcdf9 100644 --- a/src/sacctmgr/common.c +++ b/src/sacctmgr/common.c @@ -92,14 +92,16 @@ extern void destroy_sacctmgr_assoc(void *object) extern int parse_option_end(char *option) { int end = 0; - + if(!option) return 0; while(option[end] && option[end] != '=') end++; + if(!option[end]) return 0; + end++; return end; } @@ -143,57 +145,6 @@ extern char *strip_quotes(char *option, int *increased) return meat; } -extern void addto_char_list(List char_list, char *names) -{ - int i=0, start=0; - char *name = NULL, *tmp_char = NULL; - ListIterator itr = list_iterator_create(char_list); - - if(names && char_list) { - if (names[i] == '\"' || names[i] == '\'') - i++; - start = i; - while(names[i]) { - if(names[i] == '\"' || names[i] == '\'') - break; - else if(names[i] == ',') { - if((i-start) > 0) { - name = xmalloc((i-start+1)); - memcpy(name, names+start, (i-start)); - - while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) - break; - } - - if(!tmp_char) - list_append(char_list, name); - else - xfree(name); - list_iterator_reset(itr); - } - i++; - start = i; - } - i++; - } - if((i-start) > 0) { - name = xmalloc((i-start)+1); - memcpy(name, names+start, (i-start)); - while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) - break; - } - - if(!tmp_char) - list_append(char_list, name); - else - xfree(name); - } - } - list_iterator_destroy(itr); -} - extern int notice_thread_init() { pthread_attr_t attr; @@ -353,18 +304,21 @@ extern acct_user_rec_t *sacctmgr_find_user(char *name) { acct_user_rec_t *user = NULL; acct_user_cond_t user_cond; + acct_association_cond_t assoc_cond; List user_list = NULL; if(!name) return NULL; memset(&user_cond, 0, sizeof(acct_user_cond_t)); - user_cond.user_list = list_create(NULL); - list_append(user_cond.user_list, name); + memset(&assoc_cond, 0, sizeof(acct_association_cond_t)); + assoc_cond.user_list = list_create(NULL); + list_append(assoc_cond.user_list, name); + user_cond.assoc_cond = &assoc_cond; user_list = acct_storage_g_get_users(db_conn, &user_cond); - list_destroy(user_cond.user_list); + list_destroy(assoc_cond.user_list); if(user_list) user = list_pop(user_list); @@ -378,18 +332,21 @@ extern acct_account_rec_t *sacctmgr_find_account(char *name) { acct_account_rec_t *account = NULL; acct_account_cond_t account_cond; + acct_association_cond_t assoc_cond; List account_list = NULL; if(!name) return NULL; memset(&account_cond, 0, sizeof(acct_account_cond_t)); - account_cond.acct_list = list_create(NULL); - list_append(account_cond.acct_list, name); + memset(&assoc_cond, 0, sizeof(acct_association_cond_t)); + assoc_cond.acct_list = list_create(NULL); + list_append(assoc_cond.acct_list, name); + account_cond.assoc_cond = &assoc_cond; account_list = acct_storage_g_get_accounts(db_conn, &account_cond); - list_destroy(account_cond.acct_list); + list_destroy(assoc_cond.acct_list); if(account_list) account = list_pop(account_list); @@ -480,6 +437,27 @@ extern acct_association_rec_t *sacctmgr_find_account_base_assoc_from_list( return assoc; } + +extern acct_qos_rec_t *sacctmgr_find_qos_from_list( + List qos_list, char *name) +{ + ListIterator itr = NULL; + acct_qos_rec_t *qos = NULL; + + if(!name || !qos_list) + return NULL; + + itr = list_iterator_create(qos_list); + while((qos = list_next(itr))) { + if(!strcasecmp(name, qos->name)) + break; + } + list_iterator_destroy(itr); + + return qos; + +} + extern acct_user_rec_t *sacctmgr_find_user_from_list( List user_list, char *name) { @@ -562,61 +540,198 @@ extern int get_uint(char *in_value, uint32_t *out_value, char *type) return SLURM_SUCCESS; } -extern void sacctmgr_print_coord_list(type_t type, print_field_t *field, - List value) +extern int addto_qos_char_list(List char_list, List qos_list, char *names, + int option) +{ + int i=0, start=0; + char *name = NULL, *tmp_char = NULL; + ListIterator itr = NULL; + char quote_c = '\0'; + int quote = 0; + uint32_t id=0; + int count = 0; + + if(!char_list) { + error("No list was given to fill in"); + return 0; + } + + if(!qos_list || !list_count(qos_list)) { + debug2("No real qos_list"); + return 0; + } + + itr = list_iterator_create(char_list); + if(names) { + if (names[i] == '\"' || names[i] == '\'') { + quote_c = names[i]; + quote = 1; + i++; + } + start = i; + while(names[i]) { + if(quote && names[i] == quote_c) + break; + else if (names[i] == '\"' || names[i] == '\'') + names[i] = '`'; + else if(names[i] == ',') { + if((i-start) > 0) { + name = xmalloc((i-start+1)); + memcpy(name, names+start, (i-start)); + + id = str_2_acct_qos(qos_list, name); + xfree(name); + if(id == NO_VAL) + goto bad; + + if(option) { + name = xstrdup_printf( + "%c%u", option, id); + } else + name = xstrdup_printf("%u", id); + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + list_iterator_reset(itr); + + if(!tmp_char) { + list_append(char_list, name); + count++; + } else + xfree(name); + } + bad: + i++; + start = i; + if(!names[i]) { + info("There is a problem with " + "your request. It appears you " + "have spaces inside your list."); + break; + } + } + i++; + } + if((i-start) > 0) { + name = xmalloc((i-start)+1); + memcpy(name, names+start, (i-start)); + + id = str_2_acct_qos(qos_list, name); + xfree(name); + if(id == NO_VAL) + goto end_it; + + if(option) { + name = xstrdup_printf( + "%c%u", option, id); + } else + name = xstrdup_printf("%u", id); + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + list_append(char_list, name); + count++; + } else + xfree(name); + } + } +end_it: + list_iterator_destroy(itr); + return count; +} + +extern void sacctmgr_print_coord_list(print_field_t *field, List value) { ListIterator itr = NULL; char *print_this = NULL; acct_coord_rec_t *object = NULL; - switch(type) { - case SLURM_PRINT_HEADLINE: + if(!value || !list_count(value)) { if(print_fields_parsable_print) - printf("%s|", field->name); + print_this = xstrdup(""); else - printf("%-*.*s ", field->len, field->len, field->name); - break; - case SLURM_PRINT_UNDERSCORE: - if(!print_fields_parsable_print) - printf("%-*.*s ", field->len, field->len, - "---------------------------------------"); - break; - case SLURM_PRINT_VALUE: - if(!value || !list_count(value)) { - if(print_fields_parsable_print) - print_this = xstrdup(""); - else - print_this = xstrdup(" "); - } else { - list_sort(value, (ListCmpF)sort_coord_list); - itr = list_iterator_create(value); - while((object = list_next(itr))) { - if(print_this) - xstrfmtcat(print_this, ",%s", - object->name); - else - print_this = xstrdup(object->name); - } - list_iterator_destroy(itr); + print_this = xstrdup(" "); + } else { + list_sort(value, (ListCmpF)sort_coord_list); + itr = list_iterator_create(value); + while((object = list_next(itr))) { + if(print_this) + xstrfmtcat(print_this, ",%s", + object->name); + else + print_this = xstrdup(object->name); } + list_iterator_destroy(itr); + } + + if(print_fields_parsable_print) + printf("%s|", print_this); + else { + if(strlen(print_this) > field->len) + print_this[field->len-1] = '+'; + + printf("%-*.*s ", field->len, field->len, print_this); + } + xfree(print_this); +} - if(print_fields_parsable_print) - printf("%s|", print_this); - else { - if(strlen(print_this) > field->len) - print_this[field->len-1] = '+'; - - printf("%-*.*s ", field->len, field->len, print_this); - } - xfree(print_this); - break; - default: - if(print_fields_parsable_print) - printf("%s|", "n/a"); - else - printf("%-*s ", field->len, "n/a"); - break; +extern void sacctmgr_print_qos_list(print_field_t *field, List qos_list, + List value) +{ + char *print_this = NULL; + + print_this = get_qos_complete_str(qos_list, value); + + if(print_fields_parsable_print) + printf("%s|", print_this); + else { + if(strlen(print_this) > field->len) + print_this[field->len-1] = '+'; + + printf("%-*.*s ", field->len, field->len, print_this); + } + xfree(print_this); +} + +extern char *get_qos_complete_str(List qos_list, List num_qos_list) +{ + List temp_list = NULL; + char *temp_char = NULL; + char *print_this = NULL; + ListIterator itr = NULL; + + if(!qos_list || !list_count(qos_list) + || !num_qos_list || !list_count(num_qos_list)) + return xstrdup("normal"); + + temp_list = list_create(NULL); + + itr = list_iterator_create(num_qos_list); + while((temp_char = list_next(itr))) { + temp_char = acct_qos_str(qos_list, atoi(temp_char)); + if(temp_char) + list_append(temp_list, temp_char); } + list_iterator_destroy(itr); + list_sort(temp_list, (ListCmpF)sort_char_list); + itr = list_iterator_create(temp_list); + while((temp_char = list_next(itr))) { + if(print_this) + xstrfmtcat(print_this, ",%s", temp_char); + else + print_this = xstrdup(temp_char); + } + list_iterator_destroy(itr); + list_destroy(temp_list); + + if(!print_this) + return xstrdup("normal"); + + return print_this; } extern int sort_coord_list(acct_coord_rec_t *coord_a, acct_coord_rec_t *coord_b) @@ -630,3 +745,16 @@ extern int sort_coord_list(acct_coord_rec_t *coord_a, acct_coord_rec_t *coord_b) return 0; } + +extern int sort_char_list(char *name_a, char *name_b) +{ + int diff = strcmp(name_a, name_b); + + if (diff < 0) + return -1; + else if (diff > 0) + return 1; + + return 0; +} + diff --git a/src/sacctmgr/file_functions.c b/src/sacctmgr/file_functions.c index 2f949f63f..4e3b82edf 100644 --- a/src/sacctmgr/file_functions.c +++ b/src/sacctmgr/file_functions.c @@ -1,5 +1,5 @@ /*****************************************************************************\ - * cluster_functions.c - functions dealing with clusters in the + * file_functions.c - functions dealing with files that are generated in the * accounting system. ***************************************************************************** * Copyright (C) 2008 Lawrence Livermore National Security. @@ -52,7 +52,7 @@ typedef struct { char *name; char *org; char *part; - acct_qos_level_t qos; + List qos_list; } sacctmgr_file_opts_t; enum { @@ -71,7 +71,6 @@ enum { PRINT_NAME, PRINT_ORG, PRINT_QOS, - PRINT_QOS_GOLD, PRINT_QOS_RAW, PRINT_PID, PRINT_PARENT, @@ -85,6 +84,8 @@ typedef enum { MOD_USER } sacctmgr_mod_type_t; +static List qos_list = NULL; + static int _strip_continuation(char *buf, int len) { char *ptr; @@ -204,13 +205,12 @@ static sacctmgr_file_opts_t *_parse_options(char *options) sacctmgr_file_opts_t *file_opts = xmalloc(sizeof(sacctmgr_file_opts_t)); char *option = NULL; char quote_c = '\0'; - + file_opts->fairshare = 1; file_opts->max_cpu_secs_per_job = INFINITE; file_opts->max_jobs = INFINITE; file_opts->max_nodes_per_job = INFINITE; file_opts->max_wall_duration_per_job = INFINITE; - file_opts->qos = ACCT_QOS_NORMAL; file_opts->admin = ACCT_ADMIN_NONE; while(options[i]) { @@ -247,73 +247,101 @@ static sacctmgr_file_opts_t *_parse_options(char *options) option = strip_quotes(sub+end, NULL); if(!end) { if(file_opts->name) { - printf(" Bad format on %s: " + exit_code=1; + fprintf(stderr, " Bad format on %s: " "End your option with " "an '=' sign\n", sub); _destroy_sacctmgr_file_opts(file_opts); break; } file_opts->name = xstrdup(option); - } else if (strncasecmp (sub, "AdminLevel", 2) == 0) { + } else if (!strncasecmp (sub, "AdminLevel", 2)) { file_opts->admin = str_2_acct_admin_level(option); - } else if (strncasecmp (sub, "Coordinator", 2) == 0) { + } else if (!strncasecmp (sub, "Coordinator", 2)) { if(!file_opts->coord_list) file_opts->coord_list = list_create(slurm_destroy_char); - addto_char_list(file_opts->coord_list, option); - } else if (strncasecmp (sub, "DefaultAccount", 3) == 0) { + slurm_addto_char_list(file_opts->coord_list, option); + } else if (!strncasecmp (sub, "DefaultAccount", 3)) { file_opts->def_acct = xstrdup(option); - } else if (strncasecmp (sub, "Description", 3) == 0) { + } else if (!strncasecmp (sub, "Description", 3)) { file_opts->desc = xstrdup(option); - } else if (strncasecmp (sub, "FairShare", 1) == 0) { + } else if (!strncasecmp (sub, "FairShare", 1)) { if (get_uint(option, &file_opts->fairshare, "FairShare") != SLURM_SUCCESS) { - printf(" Bad FairShare value: %s\n", option); + exit_code=1; + fprintf(stderr, + " Bad FairShare value: %s\n", option); _destroy_sacctmgr_file_opts(file_opts); break; } - } else if (strncasecmp (sub, "MaxCPUSec", 4) == 0 - || strncasecmp (sub, "MaxProcSec", 4) == 0) { + } else if (!strncasecmp (sub, "MaxCPUSec", 4) + || !strncasecmp (sub, "MaxProcSec", 4)) { if (get_uint(option, &file_opts->max_cpu_secs_per_job, "MaxCPUSec") != SLURM_SUCCESS) { - printf(" Bad MaxCPUSec value: %s\n", option); + exit_code=1; + fprintf(stderr, + " Bad MaxCPUSec value: %s\n", option); _destroy_sacctmgr_file_opts(file_opts); break; } - } else if (strncasecmp (sub, "MaxJobs", 4) == 0) { + } else if (!strncasecmp (sub, "MaxJobs", 4)) { if (get_uint(option, &file_opts->max_jobs, "MaxJobs") != SLURM_SUCCESS) { - printf(" Bad MaxJobs value: %s\n", option); + exit_code=1; + fprintf(stderr, + " Bad MaxJobs value: %s\n", option); _destroy_sacctmgr_file_opts(file_opts); break; } - } else if (strncasecmp (sub, "MaxNodes", 4) == 0) { + } else if (!strncasecmp (sub, "MaxNodes", 4)) { if (get_uint(option, &file_opts->max_nodes_per_job, "MaxNodes") != SLURM_SUCCESS) { - printf(" Bad MaxNodes value: %s\n", option); + exit_code=1; + fprintf(stderr, + " Bad MaxNodes value: %s\n", option); _destroy_sacctmgr_file_opts(file_opts); break; } - } else if (strncasecmp (sub, "MaxWall", 4) == 0) { + } else if (!strncasecmp (sub, "MaxWall", 4)) { mins = time_str2mins(option); if (mins >= 0) { file_opts->max_wall_duration_per_job = (uint32_t) mins; - } else if (strcmp(option, "-1") == 0) { + } else if (strcmp(option, "-1")) { file_opts->max_wall_duration_per_job = INFINITE; } else { - printf(" Bad MaxWall time format: %s\n", + exit_code=1; + fprintf(stderr, + " Bad MaxWall time format: %s\n", option); _destroy_sacctmgr_file_opts(file_opts); break; } - } else if (strncasecmp (sub, "Organization", 1) == 0) { + } else if (!strncasecmp (sub, "Organization", 1)) { file_opts->org = xstrdup(option); - } else if (strncasecmp (sub, "QosLevel", 1) == 0 - || strncasecmp (sub, "Expedite", 1) == 0) { - file_opts->qos = str_2_acct_qos(option); + } else if (!strncasecmp (sub, "QosLevel", 1) + || !strncasecmp (sub, "Expedite", 1)) { + int option2 = 0; + if(!file_opts->qos_list) { + file_opts->qos_list = + list_create(slurm_destroy_char); + } + + if(!qos_list) { + qos_list = acct_storage_g_get_qos( + db_conn, NULL); + } + if(end > 2 && sub[end-1] == '=' + && (sub[end-2] == '+' + || sub[end-2] == '-')) + option2 = (int)sub[end-2]; + + addto_qos_char_list(file_opts->qos_list, qos_list, + option, option2); } else { - printf(" Unknown option: %s\n", sub); + exit_code=1; + fprintf(stderr, " Unknown option: %s\n", sub); } xfree(sub); @@ -330,7 +358,8 @@ static sacctmgr_file_opts_t *_parse_options(char *options) xfree(option); if(!file_opts->name) { - printf(" error: No name given\n"); + exit_code=1; + fprintf(stderr, " No name given\n"); _destroy_sacctmgr_file_opts(file_opts); } return file_opts; @@ -417,21 +446,16 @@ static List _set_up_print_fields(List format_list) field->name = xstrdup("Org"); field->len = 20; field->print_routine = print_fields_str; - } else if(!strncasecmp("QOSGOLD", object, 4)) { - field->type = PRINT_QOS_GOLD; - field->name = xstrdup("QOS_GOLD"); - field->len = 7; - field->print_routine = print_fields_uint; } else if(!strncasecmp("QOSRAW", object, 4)) { field->type = PRINT_QOS_RAW; field->name = xstrdup("QOS_RAW"); field->len = 7; - field->print_routine = print_fields_uint; + field->print_routine = print_fields_char_list; } else if(!strncasecmp("QOS", object, 1)) { field->type = PRINT_QOS; field->name = xstrdup("QOS"); field->len = 9; - field->print_routine = print_fields_str; + field->print_routine = sacctmgr_print_qos_list; } else if(!strncasecmp("Parent", object, 4)) { field->type = PRINT_PARENT; field->name = xstrdup("Parent"); @@ -448,7 +472,8 @@ static List _set_up_print_fields(List format_list) field->len = 10; field->print_routine = print_fields_str; } else { - printf("Unknown field '%s'\n", object); + exit_code=1; + fprintf(stderr, "Unknown field '%s'\n", object); xfree(field); continue; } @@ -472,10 +497,10 @@ static int _print_out_assoc(List assoc_list, bool user) format_list = list_create(slurm_destroy_char); if(user) - addto_char_list(format_list, + slurm_addto_char_list(format_list, "User,Account,F,MaxC,MaxJ,MaxN,MaxW"); else - addto_char_list(format_list, + slurm_addto_char_list(format_list, "Account,Parent,F,MaxC,MaxJ,MaxN,MaxW"); print_fields_list = _set_up_print_fields(format_list); @@ -489,41 +514,41 @@ static int _print_out_assoc(List assoc_list, bool user) while((field = list_next(itr2))) { switch(field->type) { case PRINT_ACCOUNT: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->acct); break; case PRINT_FAIRSHARE: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->fairshare); break; case PRINT_MAXC: field->print_routine( - SLURM_PRINT_VALUE, field, + field, assoc->max_cpu_secs_per_job); break; case PRINT_MAXJ: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->max_jobs); break; case PRINT_MAXN: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->max_nodes_per_job); break; case PRINT_MAXW: field->print_routine( - SLURM_PRINT_VALUE, field, + field, assoc->max_wall_duration_per_job); break; case PRINT_PARENT: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->parent_acct); break; case PRINT_PART: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->partition); break; case PRINT_USER: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->user); break; default: @@ -656,15 +681,16 @@ static int _mod_acct(sacctmgr_file_opts_t *file_opts, char *desc = NULL, *org = NULL; acct_account_rec_t mod_acct; acct_account_cond_t acct_cond; + acct_association_cond_t assoc_cond; memset(&mod_acct, 0, sizeof(acct_account_rec_t)); memset(&acct_cond, 0, sizeof(acct_account_cond_t)); + memset(&assoc_cond, 0, sizeof(acct_association_cond_t)); if(file_opts->desc) desc = xstrdup(file_opts->desc); - else - desc = xstrdup(file_opts->name); - if(strcmp(desc, acct->description)) { + + if(desc && strcmp(desc, acct->description)) { printf(" Changed description for account " "'%s' from '%s' to '%s'\n", acct->name, @@ -672,15 +698,13 @@ static int _mod_acct(sacctmgr_file_opts_t *file_opts, desc); mod_acct.description = desc; changed = 1; - } + } else + xfree(desc); if(file_opts->org) org = xstrdup(file_opts->org); - else if(strcmp(parent, "root")) - org = xstrdup(parent); - else - org = xstrdup(file_opts->name); - if(strcmp(org, acct->organization)) { + + if(org && strcmp(org, acct->organization)) { printf(" Changed organization for account '%s' " "from '%s' to '%s'\n", acct->name, @@ -688,35 +712,74 @@ static int _mod_acct(sacctmgr_file_opts_t *file_opts, org); mod_acct.organization = org; changed = 1; - } - - if(acct->qos != file_opts->qos) { - printf(" Changed QOS for account '%s' " - "from '%s' to '%s'\n", - acct->name, - acct_qos_str(acct->qos), - acct_qos_str(file_opts->qos)); - mod_acct.qos = file_opts->qos; - changed = 1; + } else + xfree(org); + + if(acct->qos_list && list_count(acct->qos_list) + && file_opts->qos_list && list_count(file_opts->qos_list)) { + ListIterator now_qos_itr = list_iterator_create(acct->qos_list), + new_qos_itr = list_iterator_create(file_opts->qos_list); + char *now_qos = NULL, *new_qos = NULL; + + if(!mod_acct.qos_list) + mod_acct.qos_list = list_create(slurm_destroy_char); + while((new_qos = list_next(new_qos_itr))) { + while((now_qos = list_next(now_qos_itr))) { + if(!strcmp(new_qos, now_qos)) + break; + } + list_iterator_reset(now_qos_itr); + if(!now_qos) + list_append(mod_acct.qos_list, + xstrdup(new_qos)); + } + list_iterator_destroy(new_qos_itr); + list_iterator_destroy(now_qos_itr); + if(mod_acct.qos_list && list_count(mod_acct.qos_list)) + new_qos = get_qos_complete_str(qos_list, + mod_acct.qos_list); + if(new_qos) { + printf(" Adding QOS for account '%s' '%s'\n", + acct->name, + new_qos); + xfree(new_qos); + changed = 1; + } else { + list_destroy(mod_acct.qos_list); + mod_acct.qos_list = NULL; + } + } else if(file_opts->qos_list && list_count(file_opts->qos_list)) { + char *new_qos = get_qos_complete_str(qos_list, + file_opts->qos_list); + + if(new_qos) { + printf(" Adding QOS for account '%s' '%s'\n", + acct->name, + new_qos); + xfree(new_qos); + mod_acct.qos_list = file_opts->qos_list; + file_opts->qos_list = NULL; + changed = 1; + } } if(changed) { List ret_list = NULL; - acct_cond.acct_list = - list_create(NULL); - - list_push(acct_cond.acct_list, - acct->name); - + assoc_cond.acct_list = list_create(NULL); + list_append(assoc_cond.acct_list, acct->name); + acct_cond.assoc_cond = &assoc_cond; + notice_thread_init(); ret_list = acct_storage_g_modify_accounts(db_conn, my_uid, &acct_cond, &mod_acct); notice_thread_fini(); - - list_destroy(acct_cond.acct_list); + list_destroy(assoc_cond.acct_list); + + if(mod_acct.qos_list) + list_destroy(mod_acct.qos_list); /* if(ret_list && list_count(ret_list)) { */ /* char *object = NULL; */ @@ -748,19 +811,20 @@ static int _mod_user(sacctmgr_file_opts_t *file_opts, acct_user_rec_t mod_user; acct_user_cond_t user_cond; List ret_list = NULL; + acct_association_cond_t assoc_cond; memset(&mod_user, 0, sizeof(acct_user_rec_t)); memset(&user_cond, 0, sizeof(acct_user_cond_t)); + memset(&assoc_cond, 0, sizeof(acct_association_cond_t)); - user_cond.user_list = list_create(NULL); - list_push(user_cond.user_list, user->name); + assoc_cond.user_list = list_create(NULL); + list_append(assoc_cond.user_list, user->name); + user_cond.assoc_cond = &assoc_cond; if(file_opts->def_acct) def_acct = xstrdup(file_opts->def_acct); - else - def_acct = xstrdup(parent); - if(strcmp(def_acct, user->default_acct)) { + if(def_acct && strcmp(def_acct, user->default_acct)) { printf(" Changed User '%s' " "default account '%s' -> '%s'\n", user->name, @@ -768,16 +832,54 @@ static int _mod_user(sacctmgr_file_opts_t *file_opts, def_acct); mod_user.default_acct = def_acct; changed = 1; - } + } else + xfree(def_acct); - if(user->qos != file_opts->qos) { - printf(" Changed User '%s' " - "QOS '%s' -> '%s'\n", - user->name, - acct_qos_str(user->qos), - acct_qos_str(file_opts->qos)); - mod_user.qos = file_opts->qos; - changed = 1; + if(user->qos_list && list_count(user->qos_list) + && file_opts->qos_list && list_count(file_opts->qos_list)) { + ListIterator now_qos_itr = list_iterator_create(user->qos_list), + new_qos_itr = list_iterator_create(file_opts->qos_list); + char *now_qos = NULL, *new_qos = NULL; + + if(!mod_user.qos_list) + mod_user.qos_list = list_create(slurm_destroy_char); + while((new_qos = list_next(new_qos_itr))) { + while((now_qos = list_next(now_qos_itr))) { + if(!strcmp(new_qos, now_qos)) + break; + } + list_iterator_reset(now_qos_itr); + if(!now_qos) + list_append(mod_user.qos_list, + xstrdup(new_qos)); + } + list_iterator_destroy(new_qos_itr); + list_iterator_destroy(now_qos_itr); + if(mod_user.qos_list && list_count(mod_user.qos_list)) + new_qos = get_qos_complete_str(qos_list, + mod_user.qos_list); + if(new_qos) { + printf(" Adding QOS for user '%s' '%s'\n", + user->name, + new_qos); + xfree(new_qos); + changed = 1; + } else + list_destroy(mod_user.qos_list); + + } else if(file_opts->qos_list && list_count(file_opts->qos_list)) { + char *new_qos = get_qos_complete_str(qos_list, + file_opts->qos_list); + + if(new_qos) { + printf(" Adding QOS for user '%s' '%s'\n", + user->name, + new_qos); + xfree(new_qos); + mod_user.qos_list = file_opts->qos_list; + file_opts->qos_list = NULL; + changed = 1; + } } if(user->admin_level != file_opts->admin) { @@ -799,6 +901,9 @@ static int _mod_user(sacctmgr_file_opts_t *file_opts, &user_cond, &mod_user); notice_thread_fini(); + + if(mod_user.qos_list) + list_destroy(mod_user.qos_list); /* if(ret_list && list_count(ret_list)) { */ /* char *object = NULL; */ @@ -815,6 +920,7 @@ static int _mod_user(sacctmgr_file_opts_t *file_opts, set = 1; } } + xfree(def_acct); if((!user->coord_accts || !list_count(user->coord_accts)) && (file_opts->coord_list @@ -892,7 +998,7 @@ static int _mod_user(sacctmgr_file_opts_t *file_opts, } list_destroy(add_list); } - list_destroy(user_cond.user_list); + list_destroy(assoc_cond.user_list); return set; } @@ -1070,16 +1176,29 @@ static int _print_file_sacctmgr_assoc_childern(FILE *fd, line = xstrdup_printf( "User - %s", sacctmgr_assoc->sort_name); if(user_rec) { - xstrfmtcat(line, ":DefaultAccount=%s", + xstrfmtcat(line, ":DefaultAccount='%s'", user_rec->default_acct); if(user_rec->admin_level > ACCT_ADMIN_NONE) - xstrfmtcat(line, ":AdminLevel=%s", + xstrfmtcat(line, ":AdminLevel='%s'", acct_admin_level_str( user_rec-> admin_level)); - if(user_rec->qos > ACCT_QOS_NORMAL) - xstrfmtcat(line, ":QOS=%s", - acct_qos_str(user_rec->qos)); + if(user_rec->qos_list + && list_count(user_rec->qos_list)) { + char *temp_char = NULL; + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } + temp_char = get_qos_complete_str( + qos_list, user_rec->qos_list); + xstrfmtcat(line, ":QOS='%s'", + temp_char); + xfree(temp_char); + } + if(user_rec->coord_accts && list_count(user_rec->coord_accts)) { ListIterator itr2 = NULL; @@ -1099,7 +1218,7 @@ static int _print_file_sacctmgr_assoc_childern(FILE *fd, xstrfmtcat( line, ":Coordinator" - "=%s", + "='%s", coord->name); first_coord = 0; } else { @@ -1107,6 +1226,8 @@ static int _print_file_sacctmgr_assoc_childern(FILE *fd, coord->name); } } + if(!first_coord) + xstrcat(line, "'"); list_iterator_destroy(itr2); } } @@ -1120,9 +1241,15 @@ static int _print_file_sacctmgr_assoc_childern(FILE *fd, acct_rec->description); xstrfmtcat(line, ":Organization='%s'", acct_rec->organization); - if(acct_rec->qos > ACCT_QOS_NORMAL) - xstrfmtcat(line, ":QOS=%s", - acct_qos_str(acct_rec->qos)); + if(acct_rec->qos_list) { + char *temp_char = get_qos_complete_str( + qos_list, acct_rec->qos_list); + if(temp_char) { + xstrfmtcat(line, ":QOS='%s'", + temp_char); + xfree(temp_char); + } + } } } if(sacctmgr_assoc->assoc->partition) @@ -1153,7 +1280,8 @@ static int _print_file_sacctmgr_assoc_childern(FILE *fd, if(fprintf(fd, "%s\n", line) < 0) { - error("Can't write to file"); + exit_code=1; + fprintf(stderr, " Can't write to file"); return SLURM_ERROR; } info("%s", line); @@ -1242,7 +1370,8 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) fd = fopen(argv[0], "r"); if (fd == NULL) { - printf(" error: Unable to read \"%s\": %m\n", argv[0]); + exit_code=1; + fprintf(stderr, " Unable to read \"%s\": %m\n", argv[0]); return; } @@ -1253,13 +1382,12 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) user_cond.with_coords = 1; curr_user_list = acct_storage_g_get_users(db_conn, &user_cond); - /* This will be freed in their local counter parts */ - acct_list = list_create(NULL); - acct_assoc_list = list_create(NULL); - user_list = list_create(NULL); - user_assoc_list = list_create(NULL); - /* These are new info so they need to be freed here */ + acct_list = list_create(destroy_acct_account_rec); + acct_assoc_list = list_create(destroy_acct_association_rec); + user_list = list_create(destroy_acct_user_rec); + user_assoc_list = list_create(destroy_acct_association_rec); + mod_acct_list = list_create(destroy_acct_account_rec); mod_user_list = list_create(destroy_acct_user_rec); mod_assoc_list = list_create(destroy_acct_association_rec); @@ -1290,16 +1418,19 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) } } if(!object[0]) { - printf(" error: Misformatted line(%d): %s\n", lc, line); + exit_code=1; + fprintf(stderr, " Misformatted line(%d): %s\n", + lc, line); rc = SLURM_ERROR; break; } while(line[start] != ' ' && start<len) start++; if(start>=len) { - printf(" error: Nothing after object " - "name '%s'. line(%d)\n", - object, lc); + exit_code=1; + fprintf(stderr, " Nothing after object " + "name '%s'. line(%d)\n", + object, lc); rc = SLURM_ERROR; break; @@ -1311,7 +1442,8 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) acct_association_cond_t assoc_cond; if(cluster_name) { - printf(" You can only add one cluster " + exit_code=1; + fprintf(stderr, " You can only add one cluster " "at a time.\n"); rc = SLURM_ERROR; break; @@ -1320,7 +1452,9 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) file_opts = _parse_options(line+start); if(!file_opts) { - printf(" error: Problem with line(%d)\n", lc); + exit_code=1; + fprintf(stderr, + " error: Problem with line(%d)\n", lc); rc = SLURM_ERROR; break; } @@ -1352,8 +1486,11 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) list_destroy(cluster_list); if(rc != SLURM_SUCCESS) { - printf(" Problem adding machine\n"); + exit_code=1; + fprintf(stderr, + " Problem adding machine\n"); rc = SLURM_ERROR; + _destroy_sacctmgr_file_opts(file_opts); break; } set = 1; @@ -1371,16 +1508,18 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) list_destroy(assoc_cond.cluster_list); if(!curr_assoc_list) { - printf(" Problem getting associations " - "for this cluster\n"); + exit_code=1; + fprintf(stderr, " Problem getting associations " + "for this cluster\n"); rc = SLURM_ERROR; break; } //info("got %d assocs", list_count(curr_assoc_list)); continue; } else if(!cluster_name) { - printf(" error: You need to specify a cluster name " - "first with 'Cluster - $NAME' in your file\n"); + exit_code=1; + fprintf(stderr, " You need to specify a cluster name " + "first with 'Cluster - $NAME' in your file\n"); break; } @@ -1392,8 +1531,9 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) i++; if(i >= len) { - printf(" error: No parent name " - "given line(%d)\n", + exit_code=1; + fprintf(stderr, " No parent name " + "given line(%d)\n", lc); rc = SLURM_ERROR; break; @@ -1404,7 +1544,8 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) curr_assoc_list, parent, cluster_name) && !sacctmgr_find_account_base_assoc_from_list( acct_assoc_list, parent, cluster_name)) { - printf(" error: line(%d) You need to add " + exit_code=1; + fprintf(stderr, " line(%d) You need to add " "this parent (%s) as a child before " "you can add childern to it.\n", lc, parent); @@ -1423,7 +1564,8 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) file_opts = _parse_options(line+start); if(!file_opts) { - printf(" error: Problem with line(%d)\n", lc); + exit_code=1; + fprintf(stderr, " Problem with line(%d)\n", lc); rc = SLURM_ERROR; break; } @@ -1453,9 +1595,11 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) /* info("adding acct %s (%s) (%s)", */ /* acct->name, acct->description, */ /* acct->organization); */ - acct->qos = file_opts->qos; + acct->qos_list = file_opts->qos_list; + file_opts->qos_list = NULL; list_append(acct_list, acct); - list_append(curr_acct_list, acct); + /* don't add anything to the + curr_acct_list */ assoc = xmalloc(sizeof(acct_association_rec_t)); assoc->acct = xstrdup(file_opts->name); @@ -1547,7 +1691,8 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) file_opts = _parse_options(line+start); if(!file_opts) { - printf(" error: Problem with line(%d)\n", lc); + exit_code=1; + fprintf(stderr, " Problem with line(%d)\n", lc); rc = SLURM_ERROR; break; } @@ -1564,22 +1709,26 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) else user->default_acct = xstrdup(parent); - user->qos = file_opts->qos; + user->qos_list = file_opts->qos_list; + file_opts->qos_list = NULL; user->admin_level = file_opts->admin; if(file_opts->coord_list) { acct_user_cond_t user_cond; + acct_association_cond_t assoc_cond; ListIterator coord_itr = NULL; char *temp_char = NULL; acct_coord_rec_t *coord = NULL; memset(&user_cond, 0, sizeof(acct_user_cond_t)); - user_cond.user_list = - list_create(NULL); - - list_push(user_cond.user_list, - user->name); + memset(&assoc_cond, 0, + sizeof(acct_association_cond_t)); + assoc_cond.user_list = + list_create(NULL); + list_append(assoc_cond.user_list, + user->name); + user_cond.assoc_cond = &assoc_cond; notice_thread_init(); rc = acct_storage_g_add_coord( @@ -1587,7 +1736,7 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) file_opts->coord_list, &user_cond); notice_thread_fini(); - list_destroy(user_cond.user_list); + list_destroy(assoc_cond.user_list); user->coord_accts = list_create( destroy_acct_coord_rec); coord_itr = list_iterator_create( @@ -1706,7 +1855,9 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) _destroy_sacctmgr_file_opts(file_opts); continue; } else { - printf(" error: Misformatted line(%d): %s\n", lc, line); + exit_code=1; + fprintf(stderr, + " Misformatted line(%d): %s\n", lc, line); rc = SLURM_ERROR; break; } @@ -1718,7 +1869,7 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) START_TIMER; if(rc == SLURM_SUCCESS && list_count(acct_list)) { printf("Accounts\n"); - addto_char_list(format_list, + slurm_addto_char_list(format_list, "Name,Description,Organization,QOS"); print_fields_list = _set_up_print_fields(format_list); @@ -1733,23 +1884,21 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) switch(field->type) { case PRINT_DESC: field->print_routine( - SLURM_PRINT_VALUE, field, acct->description); break; case PRINT_NAME: field->print_routine( - SLURM_PRINT_VALUE, field, acct->name); break; case PRINT_ORG: field->print_routine( - SLURM_PRINT_VALUE, field, acct->organization); break; case PRINT_QOS: field->print_routine( - SLURM_PRINT_VALUE, field, - acct_qos_str(acct->qos)); + field, + qos_list, + acct->qos_list); break; default: break; @@ -1775,7 +1924,8 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) if(rc == SLURM_SUCCESS && list_count(user_list)) { printf("Users\n"); - addto_char_list(format_list, "Name,Default,QOS,Admin,Coord"); + slurm_addto_char_list(format_list, + "Name,Default,QOS,Admin,Coord"); print_fields_list = _set_up_print_fields(format_list); list_flush(format_list); @@ -1788,30 +1938,29 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) switch(field->type) { case PRINT_ADMIN: field->print_routine( - SLURM_PRINT_VALUE, field, + field, acct_admin_level_str( user->admin_level)); break; case PRINT_COORDS: field->print_routine( - SLURM_PRINT_VALUE, field, user->coord_accts); break; case PRINT_DACCT: field->print_routine( - SLURM_PRINT_VALUE, field, + field, user->default_acct); break; case PRINT_NAME: field->print_routine( - SLURM_PRINT_VALUE, field, user->name); break; case PRINT_QOS: field->print_routine( - SLURM_PRINT_VALUE, field, - acct_qos_str(user->qos)); + field, + qos_list, + user->qos_list); break; default: break; @@ -1851,14 +2000,18 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) printf(" Nothing new added.\n"); } } else { - printf(" error: Problem with requests.\n"); + exit_code=1; + fprintf(stderr, " Problem with requests.\n"); } list_destroy(format_list); + list_destroy(mod_acct_list); list_destroy(acct_list); list_destroy(acct_assoc_list); + list_destroy(mod_user_list); list_destroy(user_list); list_destroy(user_assoc_list); + list_destroy(mod_assoc_list); if(curr_acct_list) list_destroy(curr_acct_list); if(curr_assoc_list) diff --git a/src/sacctmgr/qos_functions.c b/src/sacctmgr/qos_functions.c new file mode 100644 index 000000000..95a466192 --- /dev/null +++ b/src/sacctmgr/qos_functions.c @@ -0,0 +1,414 @@ +/*****************************************************************************\ + * qos_functions.c - functions dealing with qoss in the + * accounting system. + ***************************************************************************** + * Copyright (C) 2002-2008 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Danny Auble <da@llnl.gov> + * LLNL-CODE-402394. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include "src/sacctmgr/sacctmgr.h" + +static int _set_cond(int *start, int argc, char *argv[], + acct_qos_cond_t *qos_cond, + List format_list) +{ + int i; + int set = 0; + int end = 0; + + if(!qos_cond) { + error("No qos_cond given"); + return -1; + } + + for (i=(*start); i<argc; i++) { + end = parse_option_end(argv[i]); + if (!strncasecmp (argv[i], "Set", 3)) { + i--; + } else if (!strncasecmp (argv[i], "WithDeleted", 5)) { + qos_cond->with_deleted = 1; + } else if(!end && !strncasecmp(argv[i], "where", 5)) { + continue; + } else if(!end + || !strncasecmp (argv[i], "Names", 1) + || !strncasecmp (argv[i], "QOS", 1)) { + if(!qos_cond->name_list) { + qos_cond->name_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list(qos_cond->name_list, + argv[i]+end)) + set = 1; + } else if(!strncasecmp (argv[i], "Descriptions", 1)) { + if(!qos_cond->description_list) { + qos_cond->description_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list(qos_cond->description_list, + argv[i]+end)) + set = 1; + } else if(!strncasecmp (argv[i], "Ids", 1)) { + if(!qos_cond->id_list) { + qos_cond->id_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list(qos_cond->id_list, + argv[i]+end)) + set = 1; + } else { + exit_code=1; + fprintf(stderr, " Unknown condition: %s\n" + " Use keyword 'set' to modify " + "SLURM_PRINT_VALUE\n", argv[i]); + } + } + + (*start) = i; + + return set; +} + +/* static int _set_rec(int *start, int argc, char *argv[], */ +/* acct_qos_rec_t *qos) */ +/* { */ +/* int i; */ +/* int set = 0; */ +/* int end = 0; */ + +/* for (i=(*start); i<argc; i++) { */ +/* end = parse_option_end(argv[i]); */ +/* if (!strncasecmp (argv[i], "Where", 5)) { */ +/* i--; */ +/* break; */ +/* } else if(!end && !strncasecmp(argv[i], "set", 3)) { */ +/* continue; */ +/* } else if(!end) { */ +/* printf(" Bad format on %s: End your option with " */ +/* "an '=' sign\n", argv[i]); */ +/* } else if (!strncasecmp (argv[i], "Description", 1)) { */ +/* if(!qos->description) */ +/* qos->description = */ +/* strip_quotes(argv[i]+end, NULL); */ +/* set = 1; */ +/* } else if (!strncasecmp (argv[i], "Name", 1)) { */ +/* if(!qos->name) */ +/* qos->name = strip_quotes(argv[i]+end, NULL); */ +/* set = 1; */ +/* } else { */ +/* printf(" Unknown option: %s\n" */ +/* " Use keyword 'where' to modify condition\n", */ +/* argv[i]); */ +/* } */ +/* } */ + +/* (*start) = i; */ + +/* return set; */ +/* } */ + +extern int sacctmgr_add_qos(int argc, char *argv[]) +{ + int rc = SLURM_SUCCESS; + int i=0; + ListIterator itr = NULL; + acct_qos_rec_t *qos = NULL; + List name_list = list_create(slurm_destroy_char); + char *description = NULL; + char *name = NULL; + List qos_list = NULL; + List local_qos_list = NULL; + char *qos_str = NULL; + + for (i=0; i<argc; i++) { + int end = parse_option_end(argv[i]); + if(!end || !strncasecmp (argv[i], "Names", 1)) { + slurm_addto_char_list(name_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Description", 1)) { + description = strip_quotes(argv[i]+end, NULL); + } else { + exit_code=1; + fprintf(stderr, " Unknown option: %s\n", argv[i]); + } + } + + if(exit_code) { + list_destroy(name_list); + xfree(description); + return SLURM_ERROR; + } else if(!list_count(name_list)) { + list_destroy(name_list); + xfree(description); + exit_code=1; + fprintf(stderr, " Need name of qos to add.\n"); + return SLURM_SUCCESS; + } + + + local_qos_list = acct_storage_g_get_qos(db_conn, NULL); + + if(!local_qos_list) { + exit_code=1; + fprintf(stderr, " Problem getting qos's from database. " + "Contact your admin.\n"); + list_destroy(name_list); + xfree(description); + return SLURM_ERROR; + } + + qos_list = list_create(destroy_acct_qos_rec); + + itr = list_iterator_create(name_list); + while((name = list_next(itr))) { + qos = NULL; + if(!sacctmgr_find_qos_from_list(local_qos_list, name)) { + qos = xmalloc(sizeof(acct_qos_rec_t)); + qos->name = xstrdup(name); + if(description) + qos->description = xstrdup(description); + else + qos->description = xstrdup(name); + + xstrfmtcat(qos_str, " %s\n", name); + list_append(qos_list, qos); + } + } + list_iterator_destroy(itr); + list_destroy(local_qos_list); + list_destroy(name_list); + + if(!list_count(qos_list)) { + printf(" Nothing new added.\n"); + goto end_it; + } + + if(qos_str) { + printf(" Adding QOS(s)\n%s", qos_str); + printf(" Settings\n"); + if(description) + printf(" Description = %s\n", description); + else + printf(" Description = %s\n", "QOS Name"); + xfree(qos_str); + } + + notice_thread_init(); + if(list_count(qos_list)) + rc = acct_storage_g_add_qos(db_conn, my_uid, qos_list); + else + goto end_it; + + notice_thread_fini(); + + if(rc == SLURM_SUCCESS) { + if(commit_check("Would you like to commit changes?")) { + acct_storage_g_commit(db_conn, 1); + } else { + printf(" Changes Discarded\n"); + acct_storage_g_commit(db_conn, 0); + } + } else { + exit_code=1; + fprintf(stderr, " Problem adding QOS.\n"); + rc = SLURM_ERROR; + } + +end_it: + list_destroy(qos_list); + xfree(description); + + return rc; +} + +extern int sacctmgr_list_qos(int argc, char *argv[]) +{ + int rc = SLURM_SUCCESS; + acct_qos_cond_t *qos_cond = xmalloc(sizeof(acct_qos_cond_t)); + int i=0; + ListIterator itr = NULL; + ListIterator itr2 = NULL; + acct_qos_rec_t *qos = NULL; + char *object; + List qos_list = NULL; + + print_field_t *field = NULL; + + List format_list = list_create(slurm_destroy_char); + List print_fields_list; /* types are of print_field_t */ + + enum { + PRINT_DESC, + PRINT_ID, + PRINT_NAME + }; + + _set_cond(&i, argc, argv, qos_cond, format_list); + + if(exit_code) { + destroy_acct_txn_cond(qos_cond); + list_destroy(format_list); + return SLURM_ERROR; + } else if(!list_count(format_list)) { + slurm_addto_char_list(format_list, "N"); + } + + print_fields_list = list_create(destroy_print_field); + + itr = list_iterator_create(format_list); + while((object = list_next(itr))) { + field = xmalloc(sizeof(print_field_t)); + if(!strncasecmp("Description", object, 1)) { + field->type = PRINT_DESC; + field->name = xstrdup("Descr"); + field->len = 20; + field->print_routine = print_fields_str; + } else if(!strncasecmp("ID", object, 1)) { + field->type = PRINT_ID; + field->name = xstrdup("ID"); + field->len = 6; + field->print_routine = print_fields_uint; + } else if(!strncasecmp("Name", object, 1)) { + field->type = PRINT_NAME; + field->name = xstrdup("NAME"); + field->len = 10; + field->print_routine = print_fields_str; + } else { + exit_code=1; + fprintf(stderr, "Unknown field '%s'\n", object); + xfree(field); + continue; + } + list_append(print_fields_list, field); + } + list_iterator_destroy(itr); + list_destroy(format_list); + + if(exit_code) { + list_destroy(print_fields_list); + return SLURM_ERROR; + } + qos_list = acct_storage_g_get_qos(db_conn, qos_cond); + destroy_acct_qos_cond(qos_cond); + + if(!qos_list) { + exit_code=1; + fprintf(stderr, " Problem with query.\n"); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + itr = list_iterator_create(qos_list); + itr2 = list_iterator_create(print_fields_list); + print_fields_header(print_fields_list); + + while((qos = list_next(itr))) { + while((field = list_next(itr2))) { + switch(field->type) { + case PRINT_DESC: + field->print_routine( + field, qos->description); + break; + case PRINT_ID: + field->print_routine( + field, qos->id); + break; + case PRINT_NAME: + field->print_routine( + field, qos->name); + break; + default: + break; + } + } + list_iterator_reset(itr2); + printf("\n"); + } + list_iterator_destroy(itr2); + list_iterator_destroy(itr); + list_destroy(qos_list); + list_destroy(print_fields_list); + + return rc; +} + +extern int sacctmgr_delete_qos(int argc, char *argv[]) +{ + int rc = SLURM_SUCCESS; + acct_qos_cond_t *qos_cond = + xmalloc(sizeof(acct_qos_cond_t)); + int i=0; + List ret_list = NULL; + int set = 0; + + if(!(set = _set_cond(&i, argc, argv, qos_cond, NULL))) { + exit_code=1; + fprintf(stderr, + " No conditions given to remove, not executing.\n"); + destroy_acct_qos_cond(qos_cond); + return SLURM_ERROR; + } else if(set == -1) { + destroy_acct_qos_cond(qos_cond); + return SLURM_ERROR; + } + + notice_thread_init(); + ret_list = acct_storage_g_remove_qos(db_conn, my_uid, qos_cond); + notice_thread_fini(); + destroy_acct_qos_cond(qos_cond); + + if(ret_list && list_count(ret_list)) { + char *object = NULL; + ListIterator itr = list_iterator_create(ret_list); + printf(" Deleting QOS(s)...\n"); + + while((object = list_next(itr))) { + printf(" %s\n", object); + } + list_iterator_destroy(itr); + if(commit_check("Would you like to commit changes?")) { + acct_storage_g_commit(db_conn, 1); + } else { + printf(" Changes Discarded\n"); + acct_storage_g_commit(db_conn, 0); + } + } else if(ret_list) { + printf(" Nothing deleted\n"); + } else { + exit_code=1; + fprintf(stderr, " Error with request\n"); + rc = SLURM_ERROR; + } + + if(ret_list) + list_destroy(ret_list); + + return rc; +} diff --git a/src/sacctmgr/sacctmgr.c b/src/sacctmgr/sacctmgr.c index cbdc03aa6..fe53cf9a0 100644 --- a/src/sacctmgr/sacctmgr.c +++ b/src/sacctmgr/sacctmgr.c @@ -51,6 +51,7 @@ int exit_flag; /* program to terminate if =1 */ int input_words; /* number of words of input permitted */ int one_liner; /* one record per line if =1 */ int quiet_flag; /* quiet=1, verbose=-1, normal=0 */ +int verbosity; /* count of -v options */ int rollback_flag; /* immediate execute=1, else = 0 */ int with_assoc_flag = 0; void *db_conn = NULL; @@ -71,6 +72,7 @@ main (int argc, char *argv[]) int error_code = SLURM_SUCCESS, i, opt_char, input_field_count; char **input_fields; log_options_t opts = LOG_OPTS_STDERR_ONLY ; + int local_exit_code = 0; int option_index; static struct option long_options[] = { @@ -96,6 +98,7 @@ main (int argc, char *argv[]) exit_flag = 0; input_field_count = 0; quiet_flag = 0; + verbosity = 0; log_init("sacctmgr", opts, SYSLOG_FACILITY_DAEMON, NULL); if (getenv ("SACCTMGR_ALL")) @@ -139,6 +142,7 @@ main (int argc, char *argv[]) break; case (int)'v': quiet_flag = -1; + verbosity++; break; case (int)'V': _print_version(); @@ -163,6 +167,12 @@ main (int argc, char *argv[]) } } + if (verbosity) { + opts.stderr_level += verbosity; + opts.prefix_level = 1; + log_alter(opts, 0, NULL); + } + db_conn = acct_storage_g_get_connection(false, rollback_flag); my_uid = getuid(); @@ -176,8 +186,17 @@ main (int argc, char *argv[]) if (error_code || exit_flag) break; error_code = _get_command (&input_field_count, input_fields); + /* This is here so if someone made a mistake we allow + * them to fix it and let the process happen since there + * are checks for global exit_code we need to reset it. + */ + if(exit_code) { + local_exit_code = exit_code; + exit_code = 0; + } } - + if(local_exit_code) + exit_code = local_exit_code; acct_storage_g_close_connection(&db_conn); slurm_acct_storage_fini(); exit(exit_code); @@ -454,23 +473,25 @@ static void _add_it (int argc, char *argv[]) int error_code = SLURM_SUCCESS; /* First identify the entity to add */ - if (strncasecmp (argv[0], "User", 1) == 0) { - error_code = sacctmgr_add_user((argc - 1), &argv[1]); + if (strncasecmp (argv[0], "Account", 1) == 0) { + error_code = sacctmgr_add_account((argc - 1), &argv[1]); } else if (strncasecmp (argv[0], "Cluster", 2) == 0) { error_code = sacctmgr_add_cluster((argc - 1), &argv[1]); } else if (strncasecmp (argv[0], "Coordinator", 2) == 0) { error_code = sacctmgr_add_coord((argc - 1), &argv[1]); - } else if (strncasecmp (argv[0], "Account", 1) == 0) { - error_code = sacctmgr_add_account((argc - 1), &argv[1]); + } else if (strncasecmp (argv[0], "QOS", 1) == 0) { + error_code = sacctmgr_add_qos((argc - 1), &argv[1]); + } else if (strncasecmp (argv[0], "User", 1) == 0) { + error_code = sacctmgr_add_user((argc - 1), &argv[1]); } else { exit_code = 1; fprintf(stderr, "No valid entity in add command\n"); fprintf(stderr, "Input line must include, "); fprintf(stderr, "\"User\", \"Account\", \"Coordinator\", "); - fprintf(stderr, "or \"Cluster\"\n"); + fprintf(stderr, "\"Cluster\", or \"QOS\"\n"); } - if (error_code) { + if (error_code == SLURM_ERROR) { exit_code = 1; } } @@ -479,29 +500,35 @@ static void _add_it (int argc, char *argv[]) * _show_it - list the slurm configuration per the supplied arguments * IN argc - count of arguments * IN argv - list of arguments + * undocumented association options wopi and wopl + * without parent info and without parent limits */ static void _show_it (int argc, char *argv[]) { int error_code = SLURM_SUCCESS; /* First identify the entity to list */ - if (strncasecmp (argv[0], "User", 1) == 0) { - error_code = sacctmgr_list_user((argc - 1), &argv[1]); - } else if (strncasecmp (argv[0], "Account", 2) == 0) { + if (strncasecmp (argv[0], "Account", 2) == 0) { error_code = sacctmgr_list_account((argc - 1), &argv[1]); } else if (strncasecmp (argv[0], "Association", 2) == 0) { error_code = sacctmgr_list_association((argc - 1), &argv[1]); } else if (strncasecmp (argv[0], "Cluster", 1) == 0) { error_code = sacctmgr_list_cluster((argc - 1), &argv[1]); + } else if (strncasecmp (argv[0], "QOS", 1) == 0) { + error_code = sacctmgr_list_qos((argc - 1), &argv[1]); + } else if (strncasecmp (argv[0], "Transactions", 1) == 0) { + error_code = sacctmgr_list_txn((argc - 1), &argv[1]); + } else if (strncasecmp (argv[0], "User", 1) == 0) { + error_code = sacctmgr_list_user((argc - 1), &argv[1]); } else { exit_code = 1; fprintf(stderr, "No valid entity in list command\n"); fprintf(stderr, "Input line must include "); fprintf(stderr, "\"User\", \"Account\", \"Association\", "); - fprintf(stderr, "or \"Cluster\"\n"); + fprintf(stderr, "\"Cluster\", or \"QOS\"\n"); } - if (error_code) { + if (error_code == SLURM_ERROR) { exit_code = 1; } } @@ -517,12 +544,12 @@ static void _modify_it (int argc, char *argv[]) int error_code = SLURM_SUCCESS; /* First identify the entity to modify */ - if (strncasecmp (argv[0], "User", 1) == 0) { - error_code = sacctmgr_modify_user((argc - 1), &argv[1]); - } else if (strncasecmp (argv[0], "Account", 1) == 0) { + if (strncasecmp (argv[0], "Account", 1) == 0) { error_code = sacctmgr_modify_account((argc - 1), &argv[1]); } else if (strncasecmp (argv[0], "Cluster", 1) == 0) { error_code = sacctmgr_modify_cluster((argc - 1), &argv[1]); + } else if (strncasecmp (argv[0], "User", 1) == 0) { + error_code = sacctmgr_modify_user((argc - 1), &argv[1]); } else { exit_code = 1; fprintf(stderr, "No valid entity in modify command\n"); @@ -531,7 +558,7 @@ static void _modify_it (int argc, char *argv[]) fprintf(stderr, "or \"Cluster\"\n"); } - if (error_code) { + if (error_code == SLURM_ERROR) { exit_code = 1; } } @@ -546,23 +573,25 @@ static void _delete_it (int argc, char *argv[]) int error_code = SLURM_SUCCESS; /* First identify the entity to delete */ - if (strncasecmp (argv[0], "User", 1) == 0) { - error_code = sacctmgr_delete_user((argc - 1), &argv[1]); - } else if (strncasecmp (argv[0], "Account", 1) == 0) { + if (strncasecmp (argv[0], "Account", 1) == 0) { error_code = sacctmgr_delete_account((argc - 1), &argv[1]); } else if (strncasecmp (argv[0], "Cluster", 2) == 0) { error_code = sacctmgr_delete_cluster((argc - 1), &argv[1]); } else if (strncasecmp (argv[0], "Coordinator", 2) == 0) { error_code = sacctmgr_delete_coord((argc - 1), &argv[1]); + } else if (strncasecmp (argv[0], "QOS", 2) == 0) { + error_code = sacctmgr_delete_qos((argc - 1), &argv[1]); + } else if (strncasecmp (argv[0], "User", 1) == 0) { + error_code = sacctmgr_delete_user((argc - 1), &argv[1]); } else { exit_code = 1; fprintf(stderr, "No valid entity in delete command\n"); fprintf(stderr, "Input line must include "); fprintf(stderr, "\"User\", \"Account\", \"Coordinator\", "); - fprintf(stderr, "or \"Cluster\"\n"); + fprintf(stderr, "\"Cluster\", or \"QOS\"\n"); } - if (error_code) { + if (error_code == SLURM_ERROR) { exit_code = 1; } } @@ -611,7 +640,8 @@ sacctmgr [<OPTION>] [<COMMAND>] \n\ version display tool version number. \n\ !! Repeat the last command entered. \n\ \n\ - <ENTITY> may be \"cluster\", \"account\", \"user\", of \"coordinator\". \n\ + <ENTITY> may be \"account\", \"association\", \"cluster\", \n\ + \"coordinator\", \"qos\", \"transaction\", or \"user\". \n\ \n\ <SPECS> are different for each command entity pair. \n\ list account - Clusters=, Descriptions=, Format=, Names=, \n\ @@ -638,6 +668,16 @@ sacctmgr [<OPTION>] [<COMMAND>] \n\ (where options) Names= \n\ delete cluster - Names= \n\ \n\ + add coordinator - Accounts=, and Names= \n\ + delete coordinator - Accounts=, and Names= \n\ + \n\ + list qos - Descriptions=, Ids=, Names=, and WithDeleted \n\ + add qos - Description=, and Names= \n\ + delete qos - Descriptions=, Ids=, and Names= \n\ + \n\ + list transactions - Actor=, EndTime, \n\ + Format=, ID=, and Start= \n\ + \n\ list user - AdminLevel=, DefaultAccounts=, Format=, Names=,\n\ QosLevel=, and WithAssocs \n\ add user - Accounts=, AdminLevel=, Clusters=, \n\ @@ -653,8 +693,24 @@ sacctmgr [<OPTION>] [<COMMAND>] \n\ delete user - Accounts=, AdminLevel=, Clusters=, \n\ DefaultAccounts=, and Names= \n\ \n\ - add coordinator - Accounts=, and Names= \n\ - delete coordinator - Accounts=, and Names= \n\ + Format options are different for listing each entity pair. \n\ + \n\ + Account - Account, Cluster, CoordinatorList, \n\ + Description, Organization, QOS, QOSRAW \n\ + \n\ + Association - Account, Cluster, Fairshare, ID, LFT, \n\ + MaxCPUSecs, MaxJobs, MaxNodes, MaxWall, \n\ + ParentID, ParentName, Partition, RGT, User \n\ + \n\ + Cluster - Cluster, ControlHost, ControlPort, Fairshare \n\ + MaxCPUSecs, MaxJobs, MaxNodes, MaxWall \n\ + \n\ + QOS - Description, ID, Name \n\ + \n\ + Transactions - Action, Actor, ID, Info, TimeStamp, Where \n\ + \n\ + User - Account, AdminLevel, Cluster, CoordinatorList, \n\ + DefaultAccount, QOS, QOSRAW, User \n\ \n\ \n\ All commands entitys, and options are case-insensitive. \n\n"); diff --git a/src/sacctmgr/sacctmgr.h b/src/sacctmgr/sacctmgr.h index 5a2329785..b935efa51 100644 --- a/src/sacctmgr/sacctmgr.h +++ b/src/sacctmgr/sacctmgr.h @@ -109,11 +109,13 @@ extern int sacctmgr_add_user(int argc, char *argv[]); extern int sacctmgr_add_account(int argc, char *argv[]); extern int sacctmgr_add_cluster(int argc, char *argv[]); extern int sacctmgr_add_coord(int argc, char *argv[]); +extern int sacctmgr_add_qos(int argc, char *argv[]); extern int sacctmgr_list_association(int argc, char *argv[]); extern int sacctmgr_list_user(int argc, char *argv[]); extern int sacctmgr_list_account(int argc, char *argv[]); extern int sacctmgr_list_cluster(int argc, char *argv[]); +extern int sacctmgr_list_qos(int argc, char *argv[]); extern int sacctmgr_modify_association(int argc, char *argv[]); extern int sacctmgr_modify_user(int argc, char *argv[]); @@ -125,6 +127,7 @@ extern int sacctmgr_delete_user(int argc, char *argv[]); extern int sacctmgr_delete_account(int argc, char *argv[]); extern int sacctmgr_delete_cluster(int argc, char *argv[]); extern int sacctmgr_delete_coord(int argc, char *argv[]); +extern int sacctmgr_delete_qos(int argc, char *argv[]); /* this has pointers to assoc_list so do not destroy assoc_list before * using the list returned from this function. @@ -137,15 +140,19 @@ extern int sacctmgr_dump_cluster(int argc, char *argv[]); extern void destroy_sacctmgr_assoc(void *object); extern int parse_option_end(char *option); extern char *strip_quotes(char *option, int *increased); -extern void addto_char_list(List char_list, char *names); extern int notice_thread_init(); extern int notice_thread_fini(); extern int commit_check(char *warning); extern int get_uint(char *in_value, uint32_t *out_value, char *type); -extern void sacctmgr_print_coord_list(type_t type, print_field_t *field, - List value); +extern int addto_qos_char_list(List char_list, List qos_list, char *names, + int option); +extern void sacctmgr_print_coord_list(print_field_t *field, List value); +extern void sacctmgr_print_qos_list(print_field_t *field, List qos_list, + List value); +extern char *get_qos_complete_str(List qos_list, List num_qos_list); extern int sort_coord_list(acct_coord_rec_t *coord_a, acct_coord_rec_t *coord_b); +extern int sort_char_list(char *name_a, char *name_b); /* you need to free the objects returned from these functions */ extern acct_association_rec_t *sacctmgr_find_association(char *user, @@ -168,6 +175,8 @@ extern acct_association_rec_t *sacctmgr_find_association_from_list( char *cluster, char *partition); extern acct_association_rec_t *sacctmgr_find_account_base_assoc_from_list( List assoc_list, char *account, char *cluster); +extern acct_qos_rec_t *sacctmgr_find_qos_from_list( + List qos_list, char *name); extern acct_user_rec_t *sacctmgr_find_user_from_list( List user_list, char *name); extern acct_account_rec_t *sacctmgr_find_account_from_list( @@ -184,4 +193,7 @@ extern int print_file_sacctmgr_assoc_list(FILE *fd, extern void load_sacctmgr_cfg_file (int argc, char *argv[]); +/* txn_functions.c */ +extern int sacctmgr_list_txn(int argc, char *argv[]); + #endif diff --git a/src/sacctmgr/txn_functions.c b/src/sacctmgr/txn_functions.c new file mode 100644 index 000000000..b98286b01 --- /dev/null +++ b/src/sacctmgr/txn_functions.c @@ -0,0 +1,242 @@ +/*****************************************************************************\ + * txn_functions.c - functions dealing with transactions in the + * accounting system. + ***************************************************************************** + * Copyright (C) 2008 Lawrence Livermore National Security. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Danny Auble <da@llnl.gov> + * LLNL-CODE-402394. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include "src/sacctmgr/sacctmgr.h" +#include "src/common/slurmdbd_defs.h" + +static int _set_cond(int *start, int argc, char *argv[], + acct_txn_cond_t *txn_cond, + List format_list) +{ + int i, end = 0; + int set = 0; + + for (i=(*start); i<argc; i++) { + end = parse_option_end(argv[i]); + if(!end && !strncasecmp(argv[i], "where", 5)) { + continue; + } else if(!end + || (!strncasecmp (argv[i], "Id", 1)) + || (!strncasecmp (argv[i], "Txn", 1))) { + if(!txn_cond->id_list) + txn_cond->id_list = + list_create(slurm_destroy_char); + + if(slurm_addto_char_list(txn_cond->id_list, + argv[i]+end)) + set = 1; + } else if (!strncasecmp (argv[i], "Action", 4)) { + /* FIX ME! fill this in */ +/* if(!txn_cond->action_list) */ +/* txn_cond->action_list = */ +/* list_create(slurm_destroy_char); */ + +/* if(slurm_addto_char_list(txn_cond->action_list, */ +/* argv[i]+end)) */ +/* set = 1; */ + } else if (!strncasecmp (argv[i], "Actors", 4) + || !strncasecmp (argv[i], "User", 1)) { + if(!txn_cond->actor_list) + txn_cond->actor_list = + list_create(slurm_destroy_char); + if(slurm_addto_char_list(txn_cond->actor_list, + argv[i]+end)) + set = 1; + } else if (!strncasecmp (argv[i], "End", 1)) { + txn_cond->time_end = parse_time(argv[i]+end); + set = 1; + } else if (!strncasecmp (argv[i], "Format", 1)) { + if(format_list) + slurm_addto_char_list(format_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Start", 1)) { + txn_cond->time_start = parse_time(argv[i]+end); + set = 1; + } else { + exit_code=1; + fprintf(stderr, " Unknown condition: %s\n", argv[i]); + } + } + (*start) = i; + + return set; +} + + +extern int sacctmgr_list_txn(int argc, char *argv[]) +{ + int rc = SLURM_SUCCESS; + acct_txn_cond_t *txn_cond = xmalloc(sizeof(acct_txn_cond_t)); + List txn_list = NULL; + acct_txn_rec_t *txn = NULL; + int i=0; + ListIterator itr = NULL; + ListIterator itr2 = NULL; + char *object = NULL; + + print_field_t *field = NULL; + + List format_list = list_create(slurm_destroy_char); + List print_fields_list; /* types are of print_field_t */ + + enum { + PRINT_ACTION, + PRINT_ACTOR, + PRINT_ID, + PRINT_INFO, + PRINT_TS, + PRINT_WHERE + }; + + _set_cond(&i, argc, argv, txn_cond, format_list); + + if(exit_code) { + destroy_acct_txn_cond(txn_cond); + list_destroy(format_list); + return SLURM_ERROR; + } + + print_fields_list = list_create(destroy_print_field); + + if(!list_count(format_list)) + slurm_addto_char_list(format_list, "T,Action,Actor,Where,Info"); + + itr = list_iterator_create(format_list); + while((object = list_next(itr))) { + field = xmalloc(sizeof(print_field_t)); + if(!strncasecmp("Action", object, 4)) { + field->type = PRINT_ACTION; + field->name = xstrdup("Action"); + field->len = 20; + field->print_routine = print_fields_str; + } else if(!strncasecmp("Actor", object, 4)) { + field->type = PRINT_ACTOR; + field->name = xstrdup("Actor"); + field->len = 10; + field->print_routine = print_fields_str; + } else if(!strncasecmp("ID", object, 2)) { + field->type = PRINT_ID; + field->name = xstrdup("ID"); + field->len = 6; + field->print_routine = print_fields_uint; + } else if(!strncasecmp("Info", object, 2)) { + field->type = PRINT_INFO; + field->name = xstrdup("Info"); + field->len = 20; + field->print_routine = print_fields_str; + } else if(!strncasecmp("TimeStamp", object, 1)) { + field->type = PRINT_TS; + field->name = xstrdup("Time"); + field->len = 15; + field->print_routine = print_fields_date; + } else if(!strncasecmp("Where", object, 1)) { + field->type = PRINT_WHERE; + field->name = xstrdup("Where"); + field->len = 20; + field->print_routine = print_fields_str; + } else { + exit_code=1; + fprintf(stderr, " Unknown field '%s'\n", object); + xfree(field); + continue; + } + list_append(print_fields_list, field); + } + list_iterator_destroy(itr); + list_destroy(format_list); + + if(exit_code) { + list_destroy(print_fields_list); + return SLURM_ERROR; + } + + txn_list = acct_storage_g_get_txn(db_conn, txn_cond); + destroy_acct_txn_cond(txn_cond); + + if(!txn_list) { + exit_code=1; + fprintf(stderr, " Problem with query.\n"); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + itr = list_iterator_create(txn_list); + itr2 = list_iterator_create(print_fields_list); + print_fields_header(print_fields_list); + + while((txn = list_next(itr))) { + while((field = list_next(itr2))) { + switch(field->type) { + case PRINT_ACTION: + field->print_routine( + field, + slurmdbd_msg_type_2_str(txn->action)); + break; + case PRINT_ACTOR: + field->print_routine(field, + txn->actor_name); + break; + case PRINT_ID: + field->print_routine(field, + txn->id); + break; + case PRINT_INFO: + field->print_routine(field, + txn->set_info); + break; + case PRINT_TS: + field->print_routine(field, + txn->timestamp); + break; + case PRINT_WHERE: + field->print_routine(field, + txn->where_query); + break; + default: + break; + } + } + list_iterator_reset(itr2); + printf("\n"); + } + + list_iterator_destroy(itr2); + list_iterator_destroy(itr); + list_destroy(txn_list); + list_destroy(print_fields_list); + return rc; +} diff --git a/src/sacctmgr/user_functions.c b/src/sacctmgr/user_functions.c index e30becaf5..acaf69df7 100644 --- a/src/sacctmgr/user_functions.c +++ b/src/sacctmgr/user_functions.c @@ -46,60 +46,113 @@ static int _set_cond(int *start, int argc, char *argv[], int u_set = 0; int a_set = 0; int end = 0; + List qos_list = NULL; + + if(!user_cond) { + error("No user_cond given"); + return -1; + } + + if(!user_cond->assoc_cond) { + user_cond->assoc_cond = + xmalloc(sizeof(acct_association_cond_t)); + user_cond->assoc_cond->fairshare = NO_VAL; + user_cond->assoc_cond->max_cpu_secs_per_job = NO_VAL; + user_cond->assoc_cond->max_jobs = NO_VAL; + user_cond->assoc_cond->max_nodes_per_job = NO_VAL; + user_cond->assoc_cond->max_wall_duration_per_job = NO_VAL; + } for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Set", 3) == 0) { + if (!strncasecmp (argv[i], "Set", 3)) { i--; break; - } else if (!end && strncasecmp (argv[i], "WithAssoc", 5) == 0) { + } else if (!end && !strncasecmp (argv[i], "WithAssoc", 5)) { user_cond->with_assocs = 1; - } else if (strncasecmp (argv[i], "WithCoordinators", 5) == 0) { + } else if (!strncasecmp (argv[i], "WithCoordinators", 5)) { user_cond->with_coords = 1; } else if(!end && !strncasecmp(argv[i], "where", 5)) { continue; - } else if(!end) { - addto_char_list(user_cond->user_list, argv[i]); - addto_char_list(user_cond->assoc_cond->user_list, - argv[i]); - u_set = 1; - } else if (strncasecmp (argv[i], "Account", 2) == 0) { - addto_char_list(user_cond->assoc_cond->acct_list, - argv[i]+end); - a_set = 1; - } else if (strncasecmp (argv[i], "AdminLevel", 2) == 0) { + } else if(!end + || !strncasecmp (argv[i], "Names", 1) + || !strncasecmp (argv[i], "Users", 1)) { + if(!user_cond->assoc_cond->user_list) { + user_cond->assoc_cond->user_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list( + user_cond->assoc_cond->user_list, + argv[i]+end)) + u_set = 1; + } else if (!strncasecmp (argv[i], "Account", 2)) { + if(!user_cond->assoc_cond->acct_list) { + user_cond->assoc_cond->acct_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list( + user_cond->assoc_cond->acct_list, + argv[i]+end)) + a_set = 1; + } else if (!strncasecmp (argv[i], "AdminLevel", 2)) { user_cond->admin_level = str_2_acct_admin_level(argv[i]+end); u_set = 1; - } else if (strncasecmp (argv[i], "Clusters", 1) == 0) { - addto_char_list(user_cond->assoc_cond->cluster_list, - argv[i]+end); - a_set = 1; - } else if (strncasecmp (argv[i], "DefaultAccount", 1) == 0) { - addto_char_list(user_cond->def_acct_list, - argv[i]+end); - u_set = 1; - } else if (strncasecmp (argv[i], "Format", 1) == 0) { + } else if (!strncasecmp (argv[i], "Clusters", 1)) { + if(!user_cond->assoc_cond->cluster_list) { + user_cond->assoc_cond->cluster_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list( + user_cond->assoc_cond->cluster_list, + argv[i]+end)) + a_set = 1; + } else if (!strncasecmp (argv[i], "DefaultAccount", 1)) { + if(!user_cond->def_acct_list) { + user_cond->def_acct_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list(user_cond->def_acct_list, + argv[i]+end)) + u_set = 1; + } else if (!strncasecmp (argv[i], "Format", 1)) { if(format_list) - addto_char_list(format_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Names", 1) == 0 - || strncasecmp (argv[i], "Users", 1) == 0) { - addto_char_list(user_cond->user_list, argv[i]+end); - addto_char_list(user_cond->assoc_cond->user_list, - argv[i]+end); - u_set = 1; - } else if (strncasecmp (argv[i], "Partition", 3) == 0) { - addto_char_list(user_cond->assoc_cond->partition_list, - argv[i]+end); - a_set = 1; - } else if (strncasecmp (argv[i], "QosLevel", 1) == 0) { - user_cond->qos = str_2_acct_qos(argv[i]+end); + slurm_addto_char_list(format_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Partition", 3)) { + if(!user_cond->assoc_cond->partition_list) { + user_cond->assoc_cond->partition_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list( + user_cond->assoc_cond->partition_list, + argv[i]+end)) + a_set = 1; + } else if (!strncasecmp (argv[i], "QosLevel", 1)) { + int option = 0; + if(!user_cond->qos_list) { + user_cond->qos_list = + list_create(slurm_destroy_char); + } + + if(!qos_list) { + qos_list = acct_storage_g_get_qos( + db_conn, NULL); + } + + addto_qos_char_list(user_cond->qos_list, qos_list, + argv[i]+end, option); u_set = 1; } else { - printf(" Unknown condition: %s\n" - " Use keyword 'set' to modify value\n", argv[i]); + exit_code=1; + fprintf(stderr, " Unknown condition: %s\n" + " Use keyword 'set' to modify value\n", + argv[i]); } } + + if(qos_list) + list_destroy(qos_list); + (*start) = i; if(a_set) { @@ -118,51 +171,54 @@ static int _set_rec(int *start, int argc, char *argv[], int u_set = 0; int a_set = 0; int end = 0; + List qos_list = NULL; for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Where", 5) == 0) { + if (!strncasecmp (argv[i], "Where", 5)) { i--; break; } else if(!end && !strncasecmp(argv[i], "set", 3)) { continue; } else if(!end) { - printf(" Bad format on %s: End your option with " - "an '=' sign\n", argv[i]); - } else if (strncasecmp (argv[i], "AdminLevel", 2) == 0) { + exit_code=1; + fprintf(stderr, + " Bad format on %s: End your option with " + "an '=' sign\n", argv[i]); + } else if (!strncasecmp (argv[i], "AdminLevel", 2)) { user->admin_level = str_2_acct_admin_level(argv[i]+end); u_set = 1; - } else if (strncasecmp (argv[i], "DefaultAccount", 1) == 0) { + } else if (!strncasecmp (argv[i], "DefaultAccount", 1)) { user->default_acct = strip_quotes(argv[i]+end, NULL); u_set = 1; - } else if (strncasecmp (argv[i], "FairShare", 1) == 0) { + } else if (!strncasecmp (argv[i], "FairShare", 1)) { if(!association) continue; if (get_uint(argv[i]+end, &association->fairshare, - "FairShare") == SLURM_SUCCESS) + "FairShare") == SLURM_SUCCESS) a_set = 1; - } else if (strncasecmp (argv[i], "MaxCPUSec", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxCPUSec", 4)) { if(!association) continue; if (get_uint(argv[i]+end, - &association->max_cpu_secs_per_job, - "MaxCPUSec") == SLURM_SUCCESS) + &association->max_cpu_secs_per_job, + "MaxCPUSec") == SLURM_SUCCESS) a_set = 1; - } else if (strncasecmp (argv[i], "MaxJobs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxJobs", 4)) { if(!association) continue; if (get_uint(argv[i]+end, &association->max_jobs, "MaxJobs") == SLURM_SUCCESS) a_set = 1; - } else if (strncasecmp (argv[i], "MaxNodes", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxNodes", 4)) { if(!association) continue; if (get_uint(argv[i]+end, &association->max_nodes_per_job, "MaxNodes") == SLURM_SUCCESS) a_set = 1; - } else if (strncasecmp (argv[i], "MaxWall", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxWall", 4)) { if(!association) continue; mins = time_str2mins(argv[i]+end); @@ -171,18 +227,41 @@ static int _set_rec(int *start, int argc, char *argv[], = (uint32_t) mins; a_set = 1; } else { - printf(" Bad MaxWall time format: %s\n", + exit_code=1; + fprintf(stderr, + " Bad MaxWall time format: %s\n", argv[i]); } - } else if (strncasecmp (argv[i], "QosLevel", 1) == 0) { - user->qos = str_2_acct_qos(argv[i]+end); + } else if (!strncasecmp (argv[i], "QosLevel", 1)) { + int option = 0; + if(!user->qos_list) { + user->qos_list = + list_create(slurm_destroy_char); + } + + if(!qos_list) { + qos_list = acct_storage_g_get_qos( + db_conn, NULL); + } + + if(end > 2 && argv[i][end-1] == '=' + && (argv[i][end-2] == '+' + || argv[i][end-2] == '-')) + option = (int)argv[i][end-2]; + + addto_qos_char_list(user->qos_list, qos_list, + argv[i]+end, option); u_set = 1; } else { - printf(" Unknown option: %s\n" - " Use keyword 'where' to modify condition\n", - argv[i]); + exit_code=1; + fprintf(stderr, " Unknown option: %s\n" + " Use keyword 'where' to modify condition\n", + argv[i]); } } + if(qos_list) + list_destroy(qos_list); + (*start) = i; if(u_set && a_set) @@ -207,7 +286,8 @@ extern int sacctmgr_add_user(int argc, char *argv[]) char *default_acct = NULL; acct_association_cond_t *assoc_cond = NULL; acct_association_cond_t query_assoc_cond; - acct_qos_level_t qos = ACCT_QOS_NOTSET; + List add_qos_list = NULL; + List qos_list = NULL; acct_admin_level_t admin_level = ACCT_ADMIN_NOTSET; char *name = NULL, *account = NULL, *cluster = NULL, *partition = NULL; int partition_set = 0; @@ -242,73 +322,96 @@ extern int sacctmgr_add_user(int argc, char *argv[]) for (i=0; i<argc; i++) { int end = parse_option_end(argv[i]); if(!end) { - addto_char_list(assoc_cond->user_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Accounts", 2) == 0) { - addto_char_list(assoc_cond->acct_list, + slurm_addto_char_list(assoc_cond->user_list, + argv[i]+end); + } else if (!strncasecmp (argv[i], "Accounts", 2)) { + slurm_addto_char_list(assoc_cond->acct_list, argv[i]+end); - } else if (strncasecmp (argv[i], "AdminLevel", 2) == 0) { + } else if (!strncasecmp (argv[i], "AdminLevel", 2)) { admin_level = str_2_acct_admin_level(argv[i]+end); - } else if (strncasecmp (argv[i], "Clusters", 1) == 0) { - addto_char_list(assoc_cond->cluster_list, + } else if (!strncasecmp (argv[i], "Clusters", 1)) { + slurm_addto_char_list(assoc_cond->cluster_list, argv[i]+end); - } else if (strncasecmp (argv[i], "DefaultAccount", 1) == 0) { + } else if (!strncasecmp (argv[i], "DefaultAccount", 1)) { default_acct = strip_quotes(argv[i]+end, NULL); - addto_char_list(assoc_cond->acct_list, + slurm_addto_char_list(assoc_cond->acct_list, default_acct); - } else if (strncasecmp (argv[i], "FairShare", 1) == 0) { + } else if (!strncasecmp (argv[i], "FairShare", 1)) { if (get_uint(argv[i]+end, &fairshare, "FairShare") == SLURM_SUCCESS) limit_set = 1; - } else if (strncasecmp (argv[i], "MaxCPUSecs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxCPUSecs", 4)) { if (get_uint(argv[i]+end, &max_cpu_secs_per_job, "MaxCPUSecs") == SLURM_SUCCESS) limit_set = 1; - } else if (strncasecmp (argv[i], "MaxJobs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxJobs", 4)) { if (get_uint(argv[i]+end, &max_jobs, "MaxJobs") == SLURM_SUCCESS) limit_set = 1; - } else if (strncasecmp (argv[i], "MaxNodes", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxNodes", 4)) { if (get_uint(argv[i]+end, &max_nodes_per_job, "MaxNodes") == SLURM_SUCCESS) limit_set = 1; - } else if (strncasecmp (argv[i], "MaxWall", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxWall", 4)) { mins = time_str2mins(argv[i]+end); if (mins != NO_VAL) { max_wall_duration_per_job = (uint32_t) mins; limit_set = 1; } else { - printf(" Bad MaxWall time format: %s\n", + exit_code=1; + fprintf(stderr, + " Bad MaxWall time format: %s\n", argv[i]); } - } else if (strncasecmp (argv[i], "Names", 1) == 0) { - addto_char_list(assoc_cond->user_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Partitions", 1) == 0) { - addto_char_list(assoc_cond->partition_list, + } else if (!strncasecmp (argv[i], "Names", 1)) { + slurm_addto_char_list(assoc_cond->user_list, + argv[i]+end); + } else if (!strncasecmp (argv[i], "Partitions", 1)) { + slurm_addto_char_list(assoc_cond->partition_list, argv[i]+end); - } else if (strncasecmp (argv[i], "QosLevel", 1) == 0) { - qos = str_2_acct_qos(argv[i]+end); + } else if (!strncasecmp (argv[i], "QosLevel", 1)) { + int option = 0; + if(!add_qos_list) { + add_qos_list = + list_create(slurm_destroy_char); + } + + if(!qos_list) { + qos_list = acct_storage_g_get_qos( + db_conn, NULL); + } + + addto_qos_char_list(add_qos_list, qos_list, + argv[i]+end, option); } else { - printf(" Unknown option: %s\n", argv[i]); + exit_code=1; + fprintf(stderr, " Unknown option: %s\n", argv[i]); } } - if(!list_count(assoc_cond->user_list)) { + if(exit_code) { destroy_acct_association_cond(assoc_cond); - printf(" Need name of user to add.\n"); + return SLURM_ERROR; + } else if(!list_count(assoc_cond->user_list)) { + destroy_acct_association_cond(assoc_cond); + exit_code=1; + fprintf(stderr, " Need name of user to add.\n"); return SLURM_ERROR; } else { acct_user_cond_t user_cond; memset(&user_cond, 0, sizeof(acct_user_cond_t)); - user_cond.user_list = assoc_cond->user_list; - + user_cond.assoc_cond = assoc_cond; + local_user_list = acct_storage_g_get_users( db_conn, &user_cond); } + if(!local_user_list) { - printf(" Problem getting users from database. " - "Contact your admin.\n"); + exit_code=1; + fprintf(stderr, " Problem getting users from database. " + "Contact your admin.\n"); destroy_acct_association_cond(assoc_cond); return SLURM_ERROR; } @@ -316,13 +419,14 @@ extern int sacctmgr_add_user(int argc, char *argv[]) if(!list_count(assoc_cond->acct_list)) { destroy_acct_association_cond(assoc_cond); - printf(" Need name of acct to add user to.\n"); + exit_code=1; + fprintf(stderr, " Need name of acct to add user to.\n"); return SLURM_ERROR; } else { acct_account_cond_t account_cond; memset(&account_cond, 0, sizeof(acct_account_cond_t)); - account_cond.acct_list = assoc_cond->acct_list; + account_cond.assoc_cond = assoc_cond; local_acct_list = acct_storage_g_get_accounts( db_conn, &account_cond); @@ -330,7 +434,8 @@ extern int sacctmgr_add_user(int argc, char *argv[]) } if(!local_acct_list) { - printf(" Problem getting accounts from database. " + exit_code=1; + fprintf(stderr, " Problem getting accounts from database. " "Contact your admin.\n"); list_destroy(local_user_list); destroy_acct_association_cond(assoc_cond); @@ -344,7 +449,9 @@ extern int sacctmgr_add_user(int argc, char *argv[]) cluster_list = acct_storage_g_get_clusters(db_conn, NULL); if(!cluster_list) { - printf(" Problem getting clusters from database. " + exit_code=1; + fprintf(stderr, + " Problem getting clusters from database. " "Contact your admin.\n"); destroy_acct_association_cond(assoc_cond); list_destroy(local_user_list); @@ -360,8 +467,10 @@ extern int sacctmgr_add_user(int argc, char *argv[]) list_iterator_destroy(itr_c); if(!list_count(assoc_cond->cluster_list)) { - printf(" Can't add users, no cluster defined yet.\n" - " Please contact your administrator.\n"); + exit_code=1; + fprintf(stderr, + " Can't add users, no cluster defined yet.\n" + " Please contact your administrator.\n"); destroy_acct_association_cond(assoc_cond); list_destroy(local_user_list); list_destroy(local_acct_list); @@ -391,7 +500,8 @@ extern int sacctmgr_add_user(int argc, char *argv[]) user = NULL; if(!sacctmgr_find_user_from_list(local_user_list, name)) { if(!default_acct) { - printf(" Need a default account for " + exit_code=1; + fprintf(stderr, " Need a default account for " "these users to add.\n"); rc = SLURM_ERROR; goto no_default; @@ -399,10 +509,11 @@ extern int sacctmgr_add_user(int argc, char *argv[]) if(first) { if(!sacctmgr_find_account_from_list( local_acct_list, default_acct)) { - printf(" error: This account '%s' " - "doesn't exist.\n" - " Contact your admin " - "to add this account.\n", + exit_code=1; + fprintf(stderr, " This account '%s' " + "doesn't exist.\n" + " Contact your admin " + "to add this account.\n", default_acct); continue; } @@ -412,7 +523,20 @@ extern int sacctmgr_add_user(int argc, char *argv[]) user->assoc_list = list_create(NULL); user->name = xstrdup(name); user->default_acct = xstrdup(default_acct); - user->qos = qos; + + if(add_qos_list && list_count(add_qos_list)) { + char *tmp_qos = NULL; + ListIterator qos_itr = + list_iterator_create(add_qos_list); + user->qos_list = + list_create(slurm_destroy_char); + while((tmp_qos = list_next(qos_itr))) { + list_append(user->qos_list, + xstrdup(tmp_qos)); + } + list_iterator_destroy(qos_itr); + } + user->admin_level = admin_level; xstrfmtcat(user_str, " %s\n", name); @@ -424,10 +548,11 @@ extern int sacctmgr_add_user(int argc, char *argv[]) if(acct_first) { if(!sacctmgr_find_account_from_list( local_acct_list, default_acct)) { - printf(" error: This account '%s' " - "doesn't exist.\n" - " Contact your admin " - "to add this account.\n", + exit_code=1; + fprintf(stderr, " This account '%s' " + "doesn't exist.\n" + " Contact your admin " + "to add this account.\n", account); continue; } @@ -437,15 +562,17 @@ extern int sacctmgr_add_user(int argc, char *argv[]) if(!sacctmgr_find_account_base_assoc_from_list( local_assoc_list, account, cluster)) { - if(acct_first) - printf(" error: This " - "account '%s' " - "doesn't exist on " - "cluster %s\n" - " Contact your " - "admin " - "to add this account.\n", - account, cluster); + if(acct_first) { + exit_code=1; + fprintf(stderr, " This " + "account '%s' " + "doesn't exist on " + "cluster %s\n" + " Contact your " + "admin to add " + "this account.\n", + account, cluster); + } continue; } @@ -533,7 +660,8 @@ no_default: printf(" Nothing new added.\n"); goto end_it; } else if(!assoc_str) { - printf(" Error: no associations created.\n"); + exit_code=1; + fprintf(stderr, " No associations created.\n"); goto end_it; } @@ -541,8 +669,14 @@ no_default: printf(" Adding User(s)\n%s", user_str); printf(" Settings =\n"); printf(" Default Account = %s\n", default_acct); - if(qos != ACCT_QOS_NOTSET) - printf(" Qos = %s\n", acct_qos_str(qos)); + if(add_qos_list) { + char *temp_char = get_qos_complete_str( + qos_list, add_qos_list); + if(temp_char) { + printf(" Qos = %s\n", temp_char); + xfree(temp_char); + } + } if(admin_level != ACCT_ADMIN_NOTSET) printf(" Admin Level = %s\n", @@ -598,7 +732,8 @@ no_default: rc = acct_storage_g_add_associations(db_conn, my_uid, assoc_list); } else { - printf(" error: Problem adding users\n"); + exit_code=1; + fprintf(stderr, " Problem adding users\n"); rc = SLURM_ERROR; notice_thread_fini(); goto end_it; @@ -613,11 +748,14 @@ no_default: acct_storage_g_commit(db_conn, 0); } } else { - printf(" error: Problem adding user associations\n"); + exit_code=1; + fprintf(stderr, " Problem adding user associations\n"); rc = SLURM_ERROR; } end_it: + if(add_qos_list) + list_destroy(add_qos_list); list_destroy(user_list); list_destroy(assoc_list); xfree(default_acct); @@ -636,32 +774,22 @@ extern int sacctmgr_add_coord(int argc, char *argv[]) char *acct_str = NULL; ListIterator itr = NULL; - user_cond->user_list = list_create(slurm_destroy_char); - user_cond->def_acct_list = list_create(slurm_destroy_char); - - user_cond->assoc_cond = xmalloc(sizeof(acct_association_cond_t)); - user_cond->assoc_cond->user_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->acct_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->partition_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->fairshare = NO_VAL; - user_cond->assoc_cond->max_cpu_secs_per_job = NO_VAL; - user_cond->assoc_cond->max_jobs = NO_VAL; - user_cond->assoc_cond->max_nodes_per_job = NO_VAL; - user_cond->assoc_cond->max_wall_duration_per_job = NO_VAL; - for (i=0; i<argc; i++) { cond_set = _set_cond(&i, argc, argv, user_cond, NULL); } - if(!cond_set) { - printf(" You need to specify a user list " - "and account list here.\n"); + if(exit_code) { + destroy_acct_user_cond(user_cond); + return SLURM_ERROR; + } else if(!cond_set) { + exit_code=1; + fprintf(stderr, " You need to specify conditions to " + "to add the coordinator.\n"); destroy_acct_user_cond(user_cond); return SLURM_ERROR; } - itr = list_iterator_create(user_cond->user_list); + itr = list_iterator_create(user_cond->assoc_cond->user_list); while((name = list_next(itr))) { xstrfmtcat(user_str, " %s\n", name); @@ -669,8 +797,8 @@ extern int sacctmgr_add_coord(int argc, char *argv[]) list_iterator_destroy(itr); if(!user_str) { - printf(" You need to specify a user list " - "and account list here.\n"); + exit_code=1; + fprintf(stderr, " You need to specify a user list here.\n"); destroy_acct_user_cond(user_cond); return SLURM_ERROR; } @@ -681,8 +809,8 @@ extern int sacctmgr_add_coord(int argc, char *argv[]) } list_iterator_destroy(itr); if(!acct_str) { - printf(" You need to specify a user list " - "and account list here.\n"); + exit_code=1; + fprintf(stderr, " You need to specify a account list here.\n"); destroy_acct_user_cond(user_cond); return SLURM_ERROR; } @@ -705,7 +833,8 @@ extern int sacctmgr_add_coord(int argc, char *argv[]) acct_storage_g_commit(db_conn, 0); } } else { - printf(" error: Problem adding coordinator\n"); + exit_code=1; + fprintf(stderr, " Problem adding coordinator\n"); rc = SLURM_ERROR; } @@ -723,6 +852,7 @@ extern int sacctmgr_list_user(int argc, char *argv[]) acct_user_rec_t *user = NULL; acct_association_rec_t *assoc = NULL; char *object; + List qos_list = NULL; print_field_t *field = NULL; @@ -742,7 +872,6 @@ extern int sacctmgr_list_user(int argc, char *argv[]) PRINT_MAXN, PRINT_MAXW, PRINT_QOS, - PRINT_QOS_GOLD, PRINT_QOS_RAW, PRINT_PID, PRINT_PNAME, @@ -750,34 +879,23 @@ extern int sacctmgr_list_user(int argc, char *argv[]) PRINT_USER }; - user_cond->user_list = list_create(slurm_destroy_char); - user_cond->def_acct_list = list_create(slurm_destroy_char); user_cond->with_assocs = with_assoc_flag; - user_cond->assoc_cond = xmalloc(sizeof(acct_association_cond_t)); - user_cond->assoc_cond->user_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->acct_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->partition_list = list_create(slurm_destroy_char); - _set_cond(&i, argc, argv, user_cond, format_list); + if(exit_code) { + destroy_acct_user_cond(user_cond); + list_destroy(format_list); + return SLURM_ERROR; + } + if(!list_count(format_list)) { - addto_char_list(format_list, "U,D,Q,Ad"); + slurm_addto_char_list(format_list, "U,D,Q,Ad"); if(user_cond->with_assocs) - addto_char_list(format_list, + slurm_addto_char_list(format_list, "Cl,Ac,Part,F,MaxC,MaxJ,MaxN,MaxW"); if(user_cond->with_coords) - addto_char_list(format_list, "Coord"); - } - - user_list = acct_storage_g_get_users(db_conn, user_cond); - destroy_acct_user_cond(user_cond); - - if(!user_list) { - printf(" Problem with query.\n"); - list_destroy(format_list); - return SLURM_ERROR; + slurm_addto_char_list(format_list, "Coord"); } print_fields_list = list_create(destroy_print_field); @@ -840,21 +958,16 @@ extern int sacctmgr_list_user(int argc, char *argv[]) field->name = xstrdup("MaxWall"); field->len = 11; field->print_routine = print_fields_time; - } else if(!strncasecmp("QOSGOLD", object, 4)) { - field->type = PRINT_QOS_GOLD; - field->name = xstrdup("QOS_GOLD"); - field->len = 7; - field->print_routine = print_fields_uint; } else if(!strncasecmp("QOSRAW", object, 4)) { field->type = PRINT_QOS_RAW; field->name = xstrdup("QOS_RAW"); - field->len = 7; - field->print_routine = print_fields_uint; + field->len = 10; + field->print_routine = print_fields_char_list; } else if(!strncasecmp("QOS", object, 1)) { field->type = PRINT_QOS; field->name = xstrdup("QOS"); - field->len = 9; - field->print_routine = print_fields_str; + field->len = 20; + field->print_routine = sacctmgr_print_qos_list; } else if(!strncasecmp("ParentID", object, 7)) { field->type = PRINT_PID; field->name = xstrdup("Par ID"); @@ -871,7 +984,8 @@ extern int sacctmgr_list_user(int argc, char *argv[]) field->len = 10; field->print_routine = print_fields_str; } else { - printf("Unknown field '%s'\n", object); + exit_code=1; + fprintf(stderr, "Unknown field '%s'\n", object); xfree(field); continue; } @@ -880,6 +994,22 @@ extern int sacctmgr_list_user(int argc, char *argv[]) list_iterator_destroy(itr); list_destroy(format_list); + if(exit_code) { + destroy_acct_user_cond(user_cond); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + + user_list = acct_storage_g_get_users(db_conn, user_cond); + destroy_acct_user_cond(user_cond); + + if(!user_list) { + exit_code=1; + fprintf(stderr, " Problem with query.\n"); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + itr = list_iterator_create(user_list); itr2 = list_iterator_create(print_fields_list); print_fields_header(print_fields_list); @@ -894,13 +1024,11 @@ extern int sacctmgr_list_user(int argc, char *argv[]) switch(field->type) { case PRINT_ACCOUNT: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->acct); break; case PRINT_ADMIN: field->print_routine( - SLURM_PRINT_VALUE, field, acct_admin_level_str( user-> @@ -908,101 +1036,93 @@ extern int sacctmgr_list_user(int argc, char *argv[]) break; case PRINT_CLUSTER: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->cluster); break; case PRINT_COORDS: field->print_routine( - SLURM_PRINT_VALUE, field, user->coord_accts); break; case PRINT_DACCT: field->print_routine( - SLURM_PRINT_VALUE, field, user->default_acct); break; case PRINT_FAIRSHARE: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->fairshare); break; case PRINT_ID: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->id); break; case PRINT_MAXC: field->print_routine( - SLURM_PRINT_VALUE, field, assoc-> max_cpu_secs_per_job); break; case PRINT_MAXJ: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->max_jobs); break; case PRINT_MAXN: field->print_routine( - SLURM_PRINT_VALUE, field, assoc-> max_nodes_per_job); break; case PRINT_MAXW: field->print_routine( - SLURM_PRINT_VALUE, field, assoc-> max_wall_duration_per_job); break; case PRINT_QOS: + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } field->print_routine( - SLURM_PRINT_VALUE, - field, - acct_qos_str( - user->qos)); - break; - case PRINT_QOS_GOLD: - field->print_routine( - SLURM_PRINT_VALUE, field, - user->qos-1); + qos_list, + user->qos_list); break; case PRINT_QOS_RAW: + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } field->print_routine( - SLURM_PRINT_VALUE, field, - user->qos); + qos_list, + user->qos_list); break; case PRINT_PID: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->parent_id); break; case PRINT_PNAME: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->parent_acct); break; case PRINT_PART: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->partition); break; case PRINT_USER: field->print_routine( - SLURM_PRINT_VALUE, field, user->name); break; @@ -1019,89 +1139,95 @@ extern int sacctmgr_list_user(int argc, char *argv[]) switch(field->type) { case PRINT_ACCOUNT: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_ADMIN: field->print_routine( - SLURM_PRINT_VALUE, field, + field, acct_admin_level_str( user->admin_level)); break; case PRINT_CLUSTER: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_COORDS: field->print_routine( - SLURM_PRINT_VALUE, field, user->coord_accts); break; case PRINT_DACCT: field->print_routine( - SLURM_PRINT_VALUE, field, + field, user->default_acct); break; case PRINT_FAIRSHARE: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_ID: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_MAXC: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_MAXJ: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_MAXN: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_MAXW: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_QOS: + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } field->print_routine( - SLURM_PRINT_VALUE, field, - acct_qos_str(user->qos)); - break; - case PRINT_QOS_GOLD: - field->print_routine( - SLURM_PRINT_VALUE, field, - user->qos-1); + field, qos_list, + user->qos_list); break; case PRINT_QOS_RAW: + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } field->print_routine( - SLURM_PRINT_VALUE, field, - user->qos); + field, qos_list, + user->qos_list); break; case PRINT_PID: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_PART: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_USER: field->print_routine( - SLURM_PRINT_VALUE, field, + field, user->name); break; default: @@ -1131,20 +1257,6 @@ extern int sacctmgr_modify_user(int argc, char *argv[]) int cond_set = 0, rec_set = 0, set = 0; List ret_list = NULL; - user_cond->user_list = list_create(slurm_destroy_char); - user_cond->def_acct_list = list_create(slurm_destroy_char); - - user_cond->assoc_cond = xmalloc(sizeof(acct_association_cond_t)); - user_cond->assoc_cond->user_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->acct_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->partition_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->fairshare = NO_VAL; - user_cond->assoc_cond->max_cpu_secs_per_job = NO_VAL; - user_cond->assoc_cond->max_jobs = NO_VAL; - user_cond->assoc_cond->max_nodes_per_job = NO_VAL; - user_cond->assoc_cond->max_wall_duration_per_job = NO_VAL; - assoc->fairshare = NO_VAL; assoc->max_cpu_secs_per_job = NO_VAL; assoc->max_jobs = NO_VAL; @@ -1152,11 +1264,11 @@ extern int sacctmgr_modify_user(int argc, char *argv[]) assoc->max_wall_duration_per_job = NO_VAL; for (i=0; i<argc; i++) { - if (strncasecmp (argv[i], "Where", 5) == 0) { + if (!strncasecmp (argv[i], "Where", 5)) { i++; cond_set = _set_cond(&i, argc, argv, user_cond, NULL); - } else if (strncasecmp (argv[i], "Set", 3) == 0) { + } else if (!strncasecmp (argv[i], "Set", 3)) { i++; rec_set = _set_rec(&i, argc, argv, user, assoc); } else { @@ -1164,8 +1276,14 @@ extern int sacctmgr_modify_user(int argc, char *argv[]) } } - if(!rec_set) { - printf(" You didn't give me anything to set\n"); + if(exit_code) { + destroy_acct_user_cond(user_cond); + destroy_acct_user_rec(user); + destroy_acct_association_rec(assoc); + return SLURM_ERROR; + } else if(!rec_set) { + exit_code=1; + fprintf(stderr, " You didn't give me anything to set\n"); destroy_acct_user_cond(user_cond); destroy_acct_user_rec(user); destroy_acct_association_rec(assoc); @@ -1197,8 +1315,10 @@ extern int sacctmgr_modify_user(int argc, char *argv[]) list_transfer(user_cond->def_acct_list, user_cond->assoc_cond->acct_list); } else { - printf(" There was a problem with your " - "'where' options.\n"); + exit_code=1; + fprintf(stderr, + " There was a problem with your " + "'where' options.\n"); goto assoc_start; } } @@ -1216,7 +1336,8 @@ extern int sacctmgr_modify_user(int argc, char *argv[]) } else if(ret_list) { printf(" Nothing modified\n"); } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } @@ -1241,7 +1362,8 @@ assoc_start: } else if(ret_list) { printf(" Nothing modified\n"); } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } @@ -1274,21 +1396,19 @@ extern int sacctmgr_delete_user(int argc, char *argv[]) List ret_list = NULL; int set = 0; - user_cond->user_list = list_create(slurm_destroy_char); - user_cond->def_acct_list = list_create(slurm_destroy_char); - - user_cond->assoc_cond = xmalloc(sizeof(acct_association_cond_t)); - user_cond->assoc_cond->user_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->acct_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->partition_list = list_create(slurm_destroy_char); - if(!(set = _set_cond(&i, argc, argv, user_cond, NULL))) { - printf(" No conditions given to remove, not executing.\n"); + exit_code=1; + fprintf(stderr, + " No conditions given to remove, not executing.\n"); destroy_acct_user_cond(user_cond); return SLURM_ERROR; } + if(exit_code) { + destroy_acct_user_cond(user_cond); + return SLURM_ERROR; + } + notice_thread_init(); if(set == 1) { ret_list = acct_storage_g_remove_users( @@ -1322,7 +1442,8 @@ extern int sacctmgr_delete_user(int argc, char *argv[]) } else if(ret_list) { printf(" Nothing deleted\n"); } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } @@ -1345,32 +1466,23 @@ extern int sacctmgr_delete_coord(int argc, char *argv[]) ListIterator itr = NULL; List ret_list = NULL; - user_cond->user_list = list_create(slurm_destroy_char); - user_cond->def_acct_list = list_create(slurm_destroy_char); - - user_cond->assoc_cond = xmalloc(sizeof(acct_association_cond_t)); - user_cond->assoc_cond->user_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->acct_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->partition_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->fairshare = NO_VAL; - user_cond->assoc_cond->max_cpu_secs_per_job = NO_VAL; - user_cond->assoc_cond->max_jobs = NO_VAL; - user_cond->assoc_cond->max_nodes_per_job = NO_VAL; - user_cond->assoc_cond->max_wall_duration_per_job = NO_VAL; for (i=0; i<argc; i++) { cond_set = _set_cond(&i, argc, argv, user_cond, NULL); } - if(!cond_set) { - printf(" You need to specify a user list " + if(exit_code) { + destroy_acct_user_cond(user_cond); + return SLURM_ERROR; + } else if(!cond_set) { + exit_code=1; + fprintf(stderr, " You need to specify a user list " "or account list here.\n"); destroy_acct_user_cond(user_cond); return SLURM_ERROR; } - itr = list_iterator_create(user_cond->user_list); + itr = list_iterator_create(user_cond->assoc_cond->user_list); while((name = list_next(itr))) { xstrfmtcat(user_str, " %s\n", name); @@ -1384,7 +1496,8 @@ extern int sacctmgr_delete_coord(int argc, char *argv[]) } list_iterator_destroy(itr); if(!user_str && !acct_str) { - printf(" You need to specify a user list " + exit_code=1; + fprintf(stderr, " You need to specify a user list " "or an account list here.\n"); destroy_acct_user_cond(user_cond); return SLURM_ERROR; @@ -1421,7 +1534,8 @@ extern int sacctmgr_delete_coord(int argc, char *argv[]) } else if(ret_list) { printf(" Nothing removed\n"); } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } diff --git a/src/salloc/Makefile.am b/src/salloc/Makefile.am index a27f098f6..e2da3019f 100644 --- a/src/salloc/Makefile.am +++ b/src/salloc/Makefile.am @@ -1,6 +1,7 @@ # AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) @@ -8,7 +9,7 @@ bin_PROGRAMS = salloc salloc_SOURCES = salloc.c salloc.h opt.c opt.h -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl salloc_LDADD = \ $(convenience_libs) diff --git a/src/salloc/Makefile.in b/src/salloc/Makefile.in index 1c34e396f..91ae04a40 100644 --- a/src/salloc/Makefile.in +++ b/src/salloc/Makefile.in @@ -72,7 +72,8 @@ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) PROGRAMS = $(bin_PROGRAMS) am_salloc_OBJECTS = salloc.$(OBJEXT) opt.$(OBJEXT) salloc_OBJECTS = $(am_salloc_OBJECTS) -salloc_DEPENDENCIES = $(convenience_libs) +am__DEPENDENCIES_1 = $(top_builddir)/src/api/libslurm.o +salloc_DEPENDENCIES = $(am__DEPENDENCIES_1) salloc_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(salloc_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -261,9 +262,10 @@ target_vendor = @target_vendor@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) salloc_SOURCES = salloc.c salloc.h opt.c opt.h -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl salloc_LDADD = \ $(convenience_libs) @@ -466,6 +468,7 @@ install-strip: mostlyclean-generic: clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) diff --git a/src/salloc/opt.c b/src/salloc/opt.c index 9e17c746b..a46937536 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -125,7 +125,7 @@ #define LONG_OPT_NTASKSPERNODE 0x136 #define LONG_OPT_NTASKSPERSOCKET 0x137 #define LONG_OPT_NTASKSPERCORE 0x138 -#define LONG_OPT_TASK_MEM 0x13a +#define LONG_OPT_MEM_PER_CPU 0x13a #define LONG_OPT_HINT 0x13b #define LONG_OPT_ACCTG_FREQ 0x13c @@ -267,7 +267,7 @@ static void _opt_default() opt.minsockets = -1; opt.mincores = -1; opt.minthreads = -1; - opt.task_mem = -1; + opt.mem_per_cpu = -1; opt.realmem = -1; opt.tmpdisk = -1; @@ -512,8 +512,9 @@ void set_options(const int argc, char **argv) {"mincores", required_argument, 0, LONG_OPT_MINCORES}, {"minthreads", required_argument, 0, LONG_OPT_MINTHREADS}, {"mem", required_argument, 0, LONG_OPT_MEM}, - {"job-mem", required_argument, 0, LONG_OPT_TASK_MEM}, - {"task-mem", required_argument, 0, LONG_OPT_TASK_MEM}, + {"job-mem", required_argument, 0, LONG_OPT_MEM_PER_CPU}, + {"task-mem", required_argument, 0, LONG_OPT_MEM_PER_CPU}, + {"mem-per-cpu", required_argument, 0, LONG_OPT_MEM_PER_CPU}, {"hint", required_argument, 0, LONG_OPT_HINT}, {"sockets-per-node", required_argument, 0, LONG_OPT_SOCKETSPERNODE}, {"cores-per-socket", required_argument, 0, LONG_OPT_CORESPERSOCKET}, @@ -761,9 +762,9 @@ void set_options(const int argc, char **argv) exit(1); } break; - case LONG_OPT_TASK_MEM: - opt.task_mem = (int) str_to_bytes(optarg); - if (opt.task_mem < 0) { + case LONG_OPT_MEM_PER_CPU: + opt.mem_per_cpu = (int) str_to_bytes(optarg); + if (opt.mem_per_cpu < 0) { error("invalid memory constraint %s", optarg); exit(1); @@ -1015,15 +1016,11 @@ static bool _opt_verify(void) verified = false; } - /* When CR with memory as a CR is enabled we need to assign - * adequate value or check the value to opt.mem */ - if ((opt.realmem >= -1) && (opt.task_mem > 0)) { - if (opt.realmem == -1) { - opt.realmem = opt.task_mem; - } else if (opt.realmem < opt.task_mem) { - info("mem < task-mem - resizing mem to be equal " - "to task-mem"); - opt.realmem = opt.task_mem; + if ((opt.realmem > -1) && (opt.mem_per_cpu > -1)) { + if (opt.realmem < opt.mem_per_cpu) { + info("mem < mem-per-cpu - resizing mem to be equal " + "to mem-per-cpu"); + opt.realmem = opt.mem_per_cpu; } } @@ -1173,8 +1170,8 @@ static char *print_constraints() if (opt.realmem > 0) xstrfmtcat(buf, "mem=%dM ", opt.realmem); - if (opt.task_mem > 0) - xstrfmtcat(buf, "task-mem=%dM ", opt.task_mem); + if (opt.mem_per_cpu > 0) + xstrfmtcat(buf, "mem-per-cpu=%dM ", opt.mem_per_cpu); if (opt.tmpdisk > 0) xstrfmtcat(buf, "tmp=%ld ", opt.tmpdisk); @@ -1353,7 +1350,7 @@ static void _usage(void) " [--mail-type=type] [--mail-user=user][--nice[=value]]\n" " [--bell] [--no-bell] [--kill-command[=signal]]\n" " [--nodefile=file] [--nodelist=hosts] [--exclude=hosts]\n" -" [--network=type]\n" +" [--network=type] [--mem-per-cpu=MB]\n" " executable [args...]\n"); } @@ -1416,8 +1413,8 @@ static void _help(void) "Consumable resources related options:\n" " --exclusive allocate nodes in exclusive mode when\n" " cpu consumable resource is enabled\n" -" --task-mem=MB maximum amount of real memory per task\n" -" required by the job.\n" +" --mem-per-cpu=MB maximum amount of real memory per allocated\n" +" cpu required by the job.\n" " --mem >= --job-mem if --mem is specified.\n" "\n" "Affinity/Multi-core options: (when the task/affinity plugin is enabled)\n" diff --git a/src/salloc/opt.h b/src/salloc/opt.h index 972444517..2ca869cc7 100644 --- a/src/salloc/opt.h +++ b/src/salloc/opt.h @@ -107,7 +107,7 @@ typedef struct salloc_options { int minsockets; /* --minsockets=n */ int mincores; /* --mincores=n */ int minthreads; /* --minthreads=n */ - int task_mem; /* --task-mem=n */ + int mem_per_cpu; /* --mem_per_cpu=n */ int realmem; /* --mem=n */ long tmpdisk; /* --tmp=n */ char *constraints; /* --constraints=, -C constraint*/ diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index edb65cb94..79c5616b3 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -217,10 +217,6 @@ int main(int argc, char *argv[]) env_array_append_fmt(&env, "SLURM_ACCTG_FREQ", "%d", opt.acctg_freq); } - if (opt.task_mem >= 0) { - env_array_append_fmt(&env, "SLURM_TASK_MEM", "%d", - opt.task_mem); - } if (opt.network) env_array_append_fmt(&env, "SLURM_NETWORK", "%s", opt.network); env_array_set_environment(env); @@ -360,6 +356,8 @@ static int fill_job_desc_from_opts(job_desc_msg_t *desc) desc->job_min_threads = opt.minthreads; if (opt.realmem > -1) desc->job_min_memory = opt.realmem; + else if (opt.mem_per_cpu > -1) + desc->job_min_memory = opt.mem_per_cpu | MEM_PER_CPU; if (opt.tmpdisk > -1) desc->job_min_tmp_disk = opt.tmpdisk; if (opt.overcommit) { diff --git a/src/sattach/Makefile.am b/src/sattach/Makefile.am index e38c68075..749a846d1 100644 --- a/src/sattach/Makefile.am +++ b/src/sattach/Makefile.am @@ -12,7 +12,7 @@ sattach_SOURCES = \ sattach.c \ sattach.wrapper.c -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl sattach_LDADD = \ $(convenience_libs) diff --git a/src/sattach/Makefile.in b/src/sattach/Makefile.in index 45cc34b83..14d5c2fd7 100644 --- a/src/sattach/Makefile.in +++ b/src/sattach/Makefile.in @@ -73,7 +73,8 @@ PROGRAMS = $(bin_PROGRAMS) am_sattach_OBJECTS = attach.$(OBJEXT) opt.$(OBJEXT) sattach.$(OBJEXT) \ sattach.wrapper.$(OBJEXT) sattach_OBJECTS = $(am_sattach_OBJECTS) -sattach_DEPENDENCIES = $(convenience_libs) +am__DEPENDENCIES_1 = $(top_builddir)/src/api/libslurm.o +sattach_DEPENDENCIES = $(am__DEPENDENCIES_1) sattach_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sattach_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -269,7 +270,7 @@ sattach_SOURCES = \ sattach.c \ sattach.wrapper.c -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl sattach_LDADD = \ $(convenience_libs) diff --git a/src/sbatch/Makefile.am b/src/sbatch/Makefile.am index de4ffb6aa..d9b382f1b 100644 --- a/src/sbatch/Makefile.am +++ b/src/sbatch/Makefile.am @@ -1,6 +1,7 @@ # AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) @@ -8,7 +9,7 @@ bin_PROGRAMS = sbatch sbatch_SOURCES = sbatch.c opt.c opt.h -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl sbatch_LDADD = $(convenience_libs) diff --git a/src/sbatch/Makefile.in b/src/sbatch/Makefile.in index 7cd63bdf9..edd178e68 100644 --- a/src/sbatch/Makefile.in +++ b/src/sbatch/Makefile.in @@ -72,7 +72,8 @@ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) PROGRAMS = $(bin_PROGRAMS) am_sbatch_OBJECTS = sbatch.$(OBJEXT) opt.$(OBJEXT) sbatch_OBJECTS = $(am_sbatch_OBJECTS) -sbatch_DEPENDENCIES = $(convenience_libs) +am__DEPENDENCIES_1 = $(top_builddir)/src/api/libslurm.o +sbatch_DEPENDENCIES = $(am__DEPENDENCIES_1) sbatch_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sbatch_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -261,9 +262,10 @@ target_vendor = @target_vendor@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) sbatch_SOURCES = sbatch.c opt.c opt.h -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl sbatch_LDADD = $(convenience_libs) sbatch_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) all: all-am @@ -464,6 +466,7 @@ install-strip: mostlyclean-generic: clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index 365782b68..263b70c20 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -122,7 +122,7 @@ #define LONG_OPT_NTASKSPERNODE 0x136 #define LONG_OPT_NTASKSPERSOCKET 0x137 #define LONG_OPT_NTASKSPERCORE 0x138 -#define LONG_OPT_TASK_MEM 0x13a +#define LONG_OPT_MEM_PER_CPU 0x13a #define LONG_OPT_HINT 0x13b #define LONG_OPT_BLRTS_IMAGE 0x140 #define LONG_OPT_LINUX_IMAGE 0x141 @@ -269,7 +269,7 @@ static void _opt_default() opt.minsockets = -1; opt.mincores = -1; opt.minthreads = -1; - opt.task_mem = -1; + opt.mem_per_cpu = -1; opt.realmem = -1; opt.tmpdisk = -1; @@ -523,8 +523,9 @@ static struct option long_options[] = { {"mincores", required_argument, 0, LONG_OPT_MINCORES}, {"minthreads", required_argument, 0, LONG_OPT_MINTHREADS}, {"mem", required_argument, 0, LONG_OPT_MEM}, - {"job-mem", required_argument, 0, LONG_OPT_TASK_MEM}, - {"task-mem", required_argument, 0, LONG_OPT_TASK_MEM}, + {"job-mem", required_argument, 0, LONG_OPT_MEM_PER_CPU}, + {"task-mem", required_argument, 0, LONG_OPT_MEM_PER_CPU}, + {"mem-per-cpu", required_argument, 0, LONG_OPT_MEM_PER_CPU}, {"hint", required_argument, 0, LONG_OPT_HINT}, {"tmp", required_argument, 0, LONG_OPT_TMP}, {"jobid", required_argument, 0, LONG_OPT_JOBID}, @@ -1150,14 +1151,13 @@ static void _set_options(int argc, char **argv) exit(1); } break; - case LONG_OPT_TASK_MEM: - opt.task_mem = (int) str_to_bytes(optarg); - if (opt.task_mem < 0) { + case LONG_OPT_MEM_PER_CPU: + opt.mem_per_cpu = (int) str_to_bytes(optarg); + if (opt.mem_per_cpu < 0) { error("invalid memory constraint %s", optarg); exit(1); } - setenvf(NULL, "SLURM_TASK_MEM", "%d", opt.task_mem); break; case LONG_OPT_TMP: opt.tmpdisk = str_to_bytes(optarg); @@ -1773,15 +1773,11 @@ static bool _opt_verify(void) verified = false; } - /* When CR with memory as a CR is enabled we need to assign - * adequate value or check the value to opt.mem */ - if ((opt.realmem >= -1) && (opt.task_mem > 0)) { - if (opt.realmem == -1) { - opt.realmem = opt.task_mem; - } else if (opt.realmem < opt.task_mem) { - info("mem < task-mem - resizing mem to be equal " - "to task-mem"); - opt.realmem = opt.task_mem; + if ((opt.realmem > -1) && (opt.mem_per_cpu > -1)) { + if (opt.realmem < opt.mem_per_cpu) { + info("mem < mem-per-cpu - resizing mem to be equal " + "to mem-per-cpu"); + opt.realmem = opt.mem_per_cpu; } } @@ -1979,8 +1975,8 @@ static char *print_constraints() if (opt.realmem > 0) xstrfmtcat(buf, "mem=%dM ", opt.realmem); - if (opt.task_mem > 0) - xstrfmtcat(buf, "task-mem=%dM ", opt.task_mem); + if (opt.mem_per_cpu > 0) + xstrfmtcat(buf, "mem-per-cpu=%dM ", opt.mem_per_cpu); if (opt.tmpdisk > 0) xstrfmtcat(buf, "tmp=%ld ", opt.tmpdisk); @@ -2154,7 +2150,7 @@ static void _usage(void) " [--mail-type=type] [--mail-user=user][--nice[=value]]\n" " [--requeue] [--no-requeue] [--ntasks-per-node=n] [--propagate]\n" " [--nodefile=file] [--nodelist=hosts] [--exclude=hosts]\n" -" [--network=type]\n" +" [--network=type] [--mem-per-cpu=MB]\n" " executable [args...]\n"); } @@ -2219,9 +2215,8 @@ static void _help(void) "Consumable resources related options:\n" " --exclusive allocate nodes in exclusive mode when\n" " cpu consumable resource is enabled\n" -" --task-mem=MB maximum amount of real memory per task\n" -" required by the job.\n" -" --mem >= --job-mem if --mem is specified.\n" +" --mem-per-cpu=MB maximum amount of real memory per CPU\n" +" allocated to the job.\n" "\n" "Affinity/Multi-core options: (when the task/affinity plugin is enabled)\n" " -B --extra-node-info=S[:C[:T]] Expands to:\n" diff --git a/src/sbatch/opt.h b/src/sbatch/opt.h index b746cedf2..fe53e95a1 100644 --- a/src/sbatch/opt.h +++ b/src/sbatch/opt.h @@ -1,6 +1,6 @@ /*****************************************************************************\ * opt.h - definitions for srun option processing - * $Id: opt.h 14338 2008-06-24 23:10:32Z jette $ + * $Id: opt.h 14469 2008-07-09 18:15:23Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -113,7 +113,7 @@ typedef struct sbatch_options { int minsockets; /* --minsockets=n */ int mincores; /* --mincores=n */ int minthreads; /* --minthreads=n */ - int task_mem; /* --task-mem=n */ + int mem_per_cpu; /* --mem-per-cpu=n */ int realmem; /* --mem=n */ long tmpdisk; /* --tmp=n */ char *constraints; /* --constraints=, -C constraint*/ diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c index b94dc5542..5cda2761f 100644 --- a/src/sbatch/sbatch.c +++ b/src/sbatch/sbatch.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * sbatch.c - Submit a SLURM batch script. * - * $Id: sbatch.c 14420 2008-07-02 19:52:49Z jette $ + * $Id: sbatch.c 14469 2008-07-09 18:15:23Z jette $ ***************************************************************************** * Copyright (C) 2006-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -227,6 +227,8 @@ static int fill_job_desc_from_opts(job_desc_msg_t *desc) desc->job_min_threads = opt.minthreads; if (opt.realmem > -1) desc->job_min_memory = opt.realmem; + else if (opt.mem_per_cpu > -1) + desc->job_min_memory = opt.mem_per_cpu | MEM_PER_CPU; if (opt.tmpdisk > -1) desc->job_min_tmp_disk = opt.tmpdisk; if (opt.overcommit) { diff --git a/src/sbcast/Makefile.am b/src/sbcast/Makefile.am index ce502c724..bedbd116a 100644 --- a/src/sbcast/Makefile.am +++ b/src/sbcast/Makefile.am @@ -6,7 +6,7 @@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) bin_PROGRAMS = sbcast -sbcast_LDADD = $(top_builddir)/src/api/libslurmhelper.la +sbcast_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = sbcast.h sbcast_SOURCES = agent.c sbcast.c opts.c diff --git a/src/sbcast/Makefile.in b/src/sbcast/Makefile.in index 2dc5fa5ce..a415e04a0 100644 --- a/src/sbcast/Makefile.in +++ b/src/sbcast/Makefile.in @@ -75,7 +75,7 @@ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) PROGRAMS = $(bin_PROGRAMS) am_sbcast_OBJECTS = agent.$(OBJEXT) sbcast.$(OBJEXT) opts.$(OBJEXT) sbcast_OBJECTS = $(am_sbcast_OBJECTS) -sbcast_DEPENDENCIES = $(top_builddir)/src/api/libslurmhelper.la +sbcast_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o sbcast_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sbcast_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -266,7 +266,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) -sbcast_LDADD = $(top_builddir)/src/api/libslurmhelper.la +sbcast_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = sbcast.h sbcast_SOURCES = agent.c sbcast.c opts.c sbcast_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) diff --git a/src/scancel/Makefile.am b/src/scancel/Makefile.am index 135841c4a..95cfb3e61 100644 --- a/src/scancel/Makefile.am +++ b/src/scancel/Makefile.am @@ -6,7 +6,7 @@ INCLUDES = -I$(top_srcdir) bin_PROGRAMS = scancel -scancel_LDADD = $(top_builddir)/src/api/libslurmhelper.la +scancel_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = scancel.h scancel_SOURCES = scancel.c opt.c diff --git a/src/scancel/Makefile.in b/src/scancel/Makefile.in index 66ffc8f21..635087993 100644 --- a/src/scancel/Makefile.in +++ b/src/scancel/Makefile.in @@ -74,7 +74,7 @@ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) PROGRAMS = $(bin_PROGRAMS) am_scancel_OBJECTS = scancel.$(OBJEXT) opt.$(OBJEXT) scancel_OBJECTS = $(am_scancel_OBJECTS) -scancel_DEPENDENCIES = $(top_builddir)/src/api/libslurmhelper.la +scancel_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o scancel_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(scancel_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -265,7 +265,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) -scancel_LDADD = $(top_builddir)/src/api/libslurmhelper.la +scancel_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = scancel.h scancel_SOURCES = scancel.c opt.c scancel_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) diff --git a/src/scontrol/Makefile.am b/src/scontrol/Makefile.am index c450c3ded..31b51399c 100644 --- a/src/scontrol/Makefile.am +++ b/src/scontrol/Makefile.am @@ -16,7 +16,8 @@ scontrol_SOURCES = \ update_node.c \ update_part.c -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl + scontrol_LDADD = \ $(convenience_libs) \ diff --git a/src/scontrol/Makefile.in b/src/scontrol/Makefile.in index a872200ea..f546570c6 100644 --- a/src/scontrol/Makefile.in +++ b/src/scontrol/Makefile.in @@ -74,8 +74,9 @@ am_scontrol_OBJECTS = info_job.$(OBJEXT) info_node.$(OBJEXT) \ info_part.$(OBJEXT) scontrol.$(OBJEXT) update_job.$(OBJEXT) \ update_node.$(OBJEXT) update_part.$(OBJEXT) scontrol_OBJECTS = $(am_scontrol_OBJECTS) -am__DEPENDENCIES_1 = -scontrol_DEPENDENCIES = $(convenience_libs) $(am__DEPENDENCIES_1) +am__DEPENDENCIES_1 = $(top_builddir)/src/api/libslurm.o +am__DEPENDENCIES_2 = +scontrol_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_2) scontrol_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(scontrol_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -275,7 +276,7 @@ scontrol_SOURCES = \ update_node.c \ update_part.c -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl scontrol_LDADD = \ $(convenience_libs) \ $(READLINE_LIBS) diff --git a/src/scontrol/info_job.c b/src/scontrol/info_job.c index 488ae8b68..cb8b944ff 100644 --- a/src/scontrol/info_job.c +++ b/src/scontrol/info_job.c @@ -2,6 +2,7 @@ * info_job.c - job information functions for scontrol. ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> * LLNL-CODE-402394. @@ -42,7 +43,8 @@ #include "src/common/stepd_api.h" static bool _in_node_bit_list(int inx, int *node_list_array); - +static int _scontrol_load_jobs(job_info_msg_t ** job_buffer_pptr, + uint32_t job_id); /* * Determine if a node index is in a node list pair array. * RET - true if specified index is in the array @@ -67,8 +69,8 @@ _in_node_bit_list(int inx, int *node_list_array) } /* Load current job table information into *job_buffer_pptr */ -extern int -scontrol_load_jobs (job_info_msg_t ** job_buffer_pptr) +static int +_scontrol_load_jobs(job_info_msg_t ** job_buffer_pptr, uint32_t job_id) { int error_code; static job_info_msg_t *old_job_info_ptr = NULL; @@ -82,8 +84,13 @@ scontrol_load_jobs (job_info_msg_t ** job_buffer_pptr) if (old_job_info_ptr) { if (last_show_flags != show_flags) old_job_info_ptr->last_update = (time_t) 0; - error_code = slurm_load_jobs (old_job_info_ptr->last_update, + if (job_id) { + error_code = slurm_load_job(&job_info_ptr, job_id); + } else { + error_code = slurm_load_jobs( + old_job_info_ptr->last_update, &job_info_ptr, show_flags); + } if (error_code == SLURM_SUCCESS) slurm_free_job_info_msg (old_job_info_ptr); else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) { @@ -92,13 +99,17 @@ scontrol_load_jobs (job_info_msg_t ** job_buffer_pptr) if (quiet_flag == -1) printf ("slurm_load_jobs no change in data\n"); } + } else if (job_id) { + error_code = slurm_load_job(&job_info_ptr, job_id); + } else { + error_code = slurm_load_jobs((time_t) NULL, &job_info_ptr, + show_flags); } - else - error_code = slurm_load_jobs ((time_t) NULL, &job_info_ptr, - show_flags); if (error_code == SLURM_SUCCESS) { old_job_info_ptr = job_info_ptr; + if (job_id) + old_job_info_ptr->last_update = (time_t) 0; last_show_flags = show_flags; *job_buffer_pptr = job_info_ptr; } @@ -154,7 +165,7 @@ scontrol_print_completing (void) node_info_msg_t *node_info_msg; uint16_t show_flags = 0; - error_code = scontrol_load_jobs (&job_info_msg); + error_code = _scontrol_load_jobs (&job_info_msg, 0); if (error_code) { exit_code = 1; if (quiet_flag != 1) @@ -234,7 +245,10 @@ scontrol_print_job (char * job_id_str) job_info_msg_t * job_buffer_ptr = NULL; job_info_t *job_ptr = NULL; - error_code = scontrol_load_jobs(&job_buffer_ptr); + if (job_id_str) + job_id = (uint32_t) strtol (job_id_str, (char **)NULL, 10); + + error_code = _scontrol_load_jobs(&job_buffer_ptr, job_id); if (error_code) { exit_code = 1; if (quiet_flag != 1) @@ -250,17 +264,12 @@ scontrol_print_job (char * job_id_str) time_str, job_buffer_ptr->record_count); } - if (job_id_str) - job_id = (uint32_t) strtol (job_id_str, (char **)NULL, 10); + job_ptr = job_buffer_ptr->job_array ; for (i = 0; i < job_buffer_ptr->record_count; i++) { - if (job_id_str && job_id != job_ptr[i].job_id) - continue; print_cnt++; slurm_print_job_info (stdout, & job_ptr[i], one_liner ) ; - if (job_id_str) - break; } if (print_cnt == 0) { diff --git a/src/scontrol/update_job.c b/src/scontrol/update_job.c index 49ae2df8a..b981d761d 100644 --- a/src/scontrol/update_job.c +++ b/src/scontrol/update_job.c @@ -324,12 +324,19 @@ scontrol_update_job (int argc, char *argv[]) (char **) NULL, 10); update_cnt++; } - else if (strncasecmp(argv[i], "MinMemory=", 10) == 0) { + else if (strncasecmp(argv[i], "MinMemoryNode=", 14) == 0) { job_msg.job_min_memory = - (uint32_t) strtol(&argv[i][10], + (uint32_t) strtol(&argv[i][14], (char **) NULL, 10); update_cnt++; } + else if (strncasecmp(argv[i], "MinMemoryCPU=", 13) == 0) { + job_msg.job_min_memory = + (uint32_t) strtol(&argv[i][13], + (char **) NULL, 10); + job_msg.job_min_memory |= MEM_PER_CPU; + update_cnt++; + } else if (strncasecmp(argv[i], "MinTmpDisk=", 11) == 0) { job_msg.job_min_tmp_disk = (uint32_t) strtol(&argv[i][11], diff --git a/src/sinfo/Makefile.am b/src/sinfo/Makefile.am index 114d28cc9..0820811a3 100644 --- a/src/sinfo/Makefile.am +++ b/src/sinfo/Makefile.am @@ -6,7 +6,7 @@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) bin_PROGRAMS = sinfo -sinfo_LDADD = $(top_builddir)/src/api/libslurmhelper.la +sinfo_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = sinfo.h print.h sinfo_SOURCES = sinfo.c opts.c print.c sort.c diff --git a/src/sinfo/Makefile.in b/src/sinfo/Makefile.in index 07e64aec7..75e74ad02 100644 --- a/src/sinfo/Makefile.in +++ b/src/sinfo/Makefile.in @@ -76,7 +76,7 @@ PROGRAMS = $(bin_PROGRAMS) am_sinfo_OBJECTS = sinfo.$(OBJEXT) opts.$(OBJEXT) print.$(OBJEXT) \ sort.$(OBJEXT) sinfo_OBJECTS = $(am_sinfo_OBJECTS) -sinfo_DEPENDENCIES = $(top_builddir)/src/api/libslurmhelper.la +sinfo_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o sinfo_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sinfo_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -267,7 +267,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) -sinfo_LDADD = $(top_builddir)/src/api/libslurmhelper.la +sinfo_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = sinfo.h print.h sinfo_SOURCES = sinfo.c opts.c print.c sort.c sinfo_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 1dada0c79..b9ec0a969 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -2,8 +2,6 @@ * job_mgr.c - manage the job information of slurm * Note: there is a global job list (job_list), time stamp * (last_job_update), and hash table (job_hash) - * - * $Id: job_mgr.c 14311 2008-06-23 18:55:55Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -1228,7 +1226,7 @@ void dump_job_desc(job_desc_msg_t * job_specs) long kill_on_node_fail, shared, immediate; long cpus_per_task, requeue, num_tasks, overcommit; long ntasks_per_node, ntasks_per_socket, ntasks_per_core; - char buf[100]; + char *mem_type, buf[100]; if (job_specs == NULL) return; @@ -1262,12 +1260,21 @@ void dump_job_desc(job_desc_msg_t * job_specs) debug3(" job_min_cores=%ld job_min_threads=%ld", job_min_cores, job_min_threads); - job_min_memory = (job_specs->job_min_memory != NO_VAL) ? - (long) job_specs->job_min_memory : -1L; + if (job_specs->job_min_memory == NO_VAL) { + job_min_memory = -1L; + mem_type = "job"; + } else if (job_specs->job_min_memory & MEM_PER_CPU) { + job_min_memory = (long) (job_specs->job_min_memory & + (~MEM_PER_CPU)); + mem_type = "cpu"; + } else { + job_min_memory = (long) job_specs->job_min_memory; + mem_type = "job"; + } job_min_tmp_disk = (job_specs->job_min_tmp_disk != NO_VAL) ? (long) job_specs->job_min_tmp_disk : -1L; - debug3(" job_min_memory=%ld job_min_tmp_disk=%ld", - job_min_memory, job_min_tmp_disk); + debug3(" min_memory_%s=%ld job_min_tmp_disk=%ld", + mem_type, job_min_memory, job_min_tmp_disk); immediate = (job_specs->immediate == 0) ? 0L : 1L; debug3(" immediate=%ld features=%s", immediate, job_specs->features); @@ -1677,6 +1684,7 @@ extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t batch_flag, job_ptr->job_state = JOB_CANCELLED; job_ptr->start_time = now; job_ptr->end_time = now; + srun_allocate_abort(job_ptr); job_completion_logger(job_ptr); delete_job_details(job_ptr); verbose("job_signal of pending job %u successful", job_id); @@ -1994,8 +2002,9 @@ static int _job_create(job_desc_msg_t * job_desc, int allocate, int will_run, if (assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec, accounting_enforce, &assoc_ptr)) { - info("_job_create: invalid account or partition for user %u", - job_desc->user_id); + info("_job_create: invalid account or partition for user %u, " + "account '%s', and partition '%s'", + job_desc->user_id, assoc_rec.acct, assoc_rec.partition); error_code = ESLURM_INVALID_ACCOUNT; return error_code; } @@ -2854,6 +2863,53 @@ static char *_copy_nodelist_no_dup(char *node_list) return xstrdup(buf); } +static bool _valid_job_min_mem(job_desc_msg_t * job_desc_msg) +{ + uint32_t base_size = job_desc_msg->job_min_memory; + uint32_t size_limit = slurmctld_conf.max_mem_per_task; + uint16_t cpus_per_node; + + if (size_limit == 0) + return true; + + if ((base_size & MEM_PER_CPU) && (size_limit & MEM_PER_CPU)) { + base_size &= (~MEM_PER_CPU); + size_limit &= (~MEM_PER_CPU); + if (base_size <= size_limit) + return true; + return false; + } + + if (((base_size & MEM_PER_CPU) == 0) && + ((size_limit & MEM_PER_CPU) == 0)) { + if (base_size <= size_limit) + return true; + return false; + } + + /* Our size is per CPU and limit per node or vise-versa. + * CPU count my vary by node, but we don't have a good + * way to identify specific nodes for the job at this + * point, so just pick the first node as a basis for + * enforcing MaxMemPerCPU. */ + if (slurmctld_conf.fast_schedule) + cpus_per_node = node_record_table_ptr[0].config_ptr->cpus; + else + cpus_per_node = node_record_table_ptr[0].cpus; + if (job_desc_msg->num_procs != NO_VAL) + cpus_per_node = MIN(cpus_per_node, job_desc_msg->num_procs); + if (base_size & MEM_PER_CPU) { + base_size &= (~MEM_PER_CPU); + base_size *= cpus_per_node; + } else { + size_limit &= (~MEM_PER_CPU); + size_limit *= cpus_per_node; + } + if (base_size <= size_limit) + return true; + return false; +} + /* * job_time_limit - terminate jobs which have exceeded their time limit * global: job_list - pointer global job list @@ -3010,6 +3066,12 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate, job_desc_msg->nice = NICE_OFFSET; } + if (job_desc_msg->job_min_memory == NO_VAL) { + /* Default memory limit is DefMemPerCPU (if set) or no limit */ + job_desc_msg->job_min_memory = slurmctld_conf.def_mem_per_task; + } else if (!_valid_job_min_mem(job_desc_msg)) + return ESLURM_INVALID_TASK_MEMORY; + if (job_desc_msg->min_sockets == (uint16_t) NO_VAL) job_desc_msg->min_sockets = 1; /* default socket count of 1 */ if (job_desc_msg->min_cores == (uint16_t) NO_VAL) @@ -3035,8 +3097,6 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate, job_desc_msg->job_min_cores = 1; /* default 1 core per socket */ if (job_desc_msg->job_min_threads == (uint16_t) NO_VAL) job_desc_msg->job_min_threads = 1; /* default 1 thread per core */ - if (job_desc_msg->job_min_memory == NO_VAL) - job_desc_msg->job_min_memory = 0; /* default no memory limit */ if (job_desc_msg->job_min_tmp_disk == NO_VAL) job_desc_msg->job_min_tmp_disk = 0;/* default 0MB disk per node */ @@ -3188,7 +3248,7 @@ extern void pack_all_jobs(char **buffer_ptr, int *buffer_size, (job_ptr->part_ptr->hidden)) continue; - if (slurmctld_conf.private_data + if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS) && (job_ptr->user_id != uid) && !validate_super_user(uid)) continue; @@ -3208,6 +3268,53 @@ extern void pack_all_jobs(char **buffer_ptr, int *buffer_size, buffer_ptr[0] = xfer_buf_data(buffer); } +/* + * pack_one_job - dump information for one jobs in + * machine independent form (for network transmission) + * OUT buffer_ptr - the pointer is set to the allocated buffer. + * OUT buffer_size - set to size of the buffer in bytes + * IN job_id - ID of job that we want info for + * IN uid - uid of user making request (for partition filtering) + * NOTE: the buffer at *buffer_ptr must be xfreed by the caller + * NOTE: change _unpack_job_desc_msg() in common/slurm_protocol_pack.c + * whenever the data format changes + */ +extern int pack_one_job(char **buffer_ptr, int *buffer_size, + uint32_t job_id, uid_t uid) +{ + ListIterator job_iterator; + struct job_record *job_ptr; + uint32_t jobs_packed = 0; + Buf buffer; + + buffer_ptr[0] = NULL; + *buffer_size = 0; + + job_iterator = list_iterator_create(job_list); + while ((job_ptr = (struct job_record *) list_next(job_iterator))) { + if (job_ptr->job_id != job_id) + continue; + + if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS) + && (job_ptr->user_id != uid) && !validate_super_user(uid)) + break; + + jobs_packed++; + break; + } + list_iterator_destroy(job_iterator); + if (jobs_packed == 0) + return ESLURM_INVALID_JOB_ID; + + buffer = init_buf(BUF_SIZE); + pack32(jobs_packed, buffer); + pack_time(time(NULL), buffer); + pack_job(job_ptr, buffer); + + *buffer_size = get_buf_offset(buffer); + buffer_ptr[0] = xfer_buf_data(buffer); + return SLURM_SUCCESS; +} /* * pack_job - dump all configuration information about a specific job in @@ -3853,12 +3960,16 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) if (job_specs->job_min_memory != NO_VAL) { if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) error_code = ESLURM_DISABLED; - else if (super_user - || (detail_ptr->job_min_memory - > job_specs->job_min_memory)) { + else if (super_user) { + char *entity; + if (job_specs->job_min_memory & MEM_PER_CPU) + entity = "cpu"; + else + entity = "job"; detail_ptr->job_min_memory = job_specs->job_min_memory; - info("update_job: setting job_min_memory to %u for " - "job_id %u", job_specs->job_min_memory, + info("update_job: setting min_memory_%s to %u for " + "job_id %u", entity, + (job_specs->job_min_memory & (~MEM_PER_CPU)), job_specs->job_id); } else { error("Attempt to increase job_min_memory for job %u", @@ -4590,7 +4701,7 @@ job_alloc_info(uint32_t uid, uint32_t job_id, struct job_record **job_pptr) if ((job_ptr->user_id != uid) && (uid != 0) && (uid != slurmctld_conf.slurm_user_id)) return ESLURM_ACCESS_DENIED; - if (slurmctld_conf.private_data + if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS) && (job_ptr->user_id != uid) && !validate_super_user(uid)) return ESLURM_ACCESS_DENIED; if (IS_JOB_PENDING(job_ptr)) diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 457827d24..6f0936a9c 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -458,7 +458,7 @@ extern void launch_job(struct job_record *job_ptr) launch_msg_ptr->open_mode = job_ptr->details->open_mode; launch_msg_ptr->acctg_freq = job_ptr->details->acctg_freq; - if (make_batch_job_cred(launch_msg_ptr)) { + if (make_batch_job_cred(launch_msg_ptr, job_ptr)) { error("aborting batch job %u", job_ptr->job_id); /* FIXME: This is a kludge, but this event indicates a serious * problem with OpenSSH and should never happen. We are @@ -524,9 +524,11 @@ _xduparray(uint16_t size, char ** array) * make_batch_job_cred - add a job credential to the batch_job_launch_msg * IN/OUT launch_msg_ptr - batch_job_launch_msg in which job_id, step_id, * uid and nodes have already been set + * IN job_ptr - pointer to job record * RET 0 or error code */ -extern int make_batch_job_cred(batch_job_launch_msg_t *launch_msg_ptr) +extern int make_batch_job_cred(batch_job_launch_msg_t *launch_msg_ptr, + struct job_record *job_ptr) { slurm_cred_arg_t cred_arg; @@ -534,6 +536,15 @@ extern int make_batch_job_cred(batch_job_launch_msg_t *launch_msg_ptr) cred_arg.stepid = launch_msg_ptr->step_id; cred_arg.uid = launch_msg_ptr->uid; cred_arg.hostlist = launch_msg_ptr->nodes; + if (job_ptr->details == NULL) + cred_arg.job_mem = 0; + else if (job_ptr->details->job_min_memory & MEM_PER_CPU) { + cred_arg.job_mem = job_ptr->details->job_min_memory; + cred_arg.job_mem &= (~MEM_PER_CPU); + cred_arg.job_mem *= job_ptr->alloc_lps[0]; + } else + cred_arg.job_mem = job_ptr->details->job_min_memory; + cred_arg.alloc_lps_cnt = 0; cred_arg.alloc_lps = NULL; diff --git a/src/slurmctld/job_scheduler.h b/src/slurmctld/job_scheduler.h index b40310137..f09cb7e72 100644 --- a/src/slurmctld/job_scheduler.h +++ b/src/slurmctld/job_scheduler.h @@ -82,9 +82,11 @@ extern void launch_job(struct job_record *job_ptr); * make_batch_job_cred - add a job credential to the batch_job_launch_msg * IN/OUT launch_msg_ptr - batch_job_launch_msg in which job_id, step_id, * uid and nodes have already been set + * IN job_ptr - pointer to job record * RET 0 or error code */ -extern int make_batch_job_cred(batch_job_launch_msg_t *launch_msg_ptr); +extern int make_batch_job_cred(batch_job_launch_msg_t *launch_msg_ptr, + struct job_record *job_ptr); /* Print a job's dependency information based upon job_ptr->depend_list */ extern void print_job_dependency(struct job_record *job_ptr); diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 0bc50f8e9..c88700050 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -1196,7 +1196,8 @@ extern int job_req_node_filter(struct job_record *job_ptr, FREE_NULL_BITMAP(feature_bitmap); if (slurmctld_conf.fast_schedule) { if ((detail_ptr->job_min_procs > config_ptr->cpus ) - || (detail_ptr->job_min_memory > config_ptr->real_memory) + || ((detail_ptr->job_min_memory & (~MEM_PER_CPU)) > + config_ptr->real_memory) || (detail_ptr->job_min_tmp_disk > config_ptr->tmp_disk)) { bit_clear(avail_bitmap, i); continue; @@ -1213,7 +1214,8 @@ extern int job_req_node_filter(struct job_record *job_ptr, } } else { if ((detail_ptr->job_min_procs > node_ptr->cpus ) - || (detail_ptr->job_min_memory > node_ptr->real_memory) + || ((detail_ptr->job_min_memory & (~MEM_PER_CPU)) > + node_ptr->real_memory) || (detail_ptr->job_min_tmp_disk > node_ptr->tmp_disk)) { bit_clear(avail_bitmap, i); continue; @@ -1284,7 +1286,8 @@ static int _build_node_list(struct job_record *job_ptr, config_filter = 0; if ((detail_ptr->job_min_procs > config_ptr->cpus ) - || (detail_ptr->job_min_memory > config_ptr->real_memory) + || ((detail_ptr->job_min_memory & (~MEM_PER_CPU)) > + config_ptr->real_memory) || (detail_ptr->job_min_tmp_disk > config_ptr->tmp_disk)) config_filter = 1; if (mc_ptr @@ -1391,7 +1394,8 @@ static void _filter_nodes_in_set(struct node_set *node_set_ptr, node_con = node_record_table_ptr[i].config_ptr; if ((job_con->job_min_procs <= node_con->cpus) - && (job_con->job_min_memory <= node_con->real_memory) + && ((job_con->job_min_memory & (~MEM_PER_CPU)) <= + node_con->real_memory) && (job_con->job_min_tmp_disk <= node_con->tmp_disk)) job_ok = 1; if (mc_ptr @@ -1419,7 +1423,8 @@ static void _filter_nodes_in_set(struct node_set *node_set_ptr, node_ptr = &node_record_table_ptr[i]; if ((job_con->job_min_procs <= node_ptr->cpus) - && (job_con->job_min_memory <= node_ptr->real_memory) + && ((job_con->job_min_memory & (~MEM_PER_CPU)) <= + node_ptr->real_memory) && (job_con->job_min_tmp_disk <= node_ptr->tmp_disk)) job_ok = 1; if (mc_ptr diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index ed3d9d2f8..3451467bd 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -95,6 +95,7 @@ inline static void _slurm_rpc_complete_job_allocation(slurm_msg_t * msg); inline static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg); inline static void _slurm_rpc_dump_conf(slurm_msg_t * msg); inline static void _slurm_rpc_dump_jobs(slurm_msg_t * msg); +inline static void _slurm_rpc_dump_job_single(slurm_msg_t * msg); inline static void _slurm_rpc_dump_nodes(slurm_msg_t * msg); inline static void _slurm_rpc_dump_partitions(slurm_msg_t * msg); inline static void _slurm_rpc_epilog_complete(slurm_msg_t * msg); @@ -149,6 +150,10 @@ void slurmctld_req (slurm_msg_t * msg) _slurm_rpc_dump_jobs(msg); slurm_free_job_info_request_msg(msg->data); break; + case REQUEST_JOB_INFO_SINGLE: + _slurm_rpc_dump_job_single(msg); + slurm_free_job_id_msg(msg->data); + break; case REQUEST_JOB_END_TIME: _slurm_rpc_end_time(msg); slurm_free_job_alloc_info_msg(msg->data); @@ -498,24 +503,47 @@ static int _make_step_cred(struct step_record *step_rec, slurm_cred_t *slurm_cred) { slurm_cred_arg_t cred_arg; + struct job_record* job_ptr = step_rec->job_ptr; - cred_arg.jobid = step_rec->job_ptr->job_id; + cred_arg.jobid = job_ptr->job_id; cred_arg.stepid = step_rec->step_id; - cred_arg.uid = step_rec->job_ptr->user_id; - cred_arg.job_mem = step_rec->job_ptr->details->job_min_memory; + cred_arg.uid = job_ptr->user_id; + cred_arg.job_mem = job_ptr->details->job_min_memory; cred_arg.task_mem = step_rec->mem_per_task; cred_arg.hostlist = step_rec->step_layout->node_list; - if(step_rec->job_ptr->details->shared == 0) - cred_arg.alloc_lps_cnt = 0; - else - cred_arg.alloc_lps_cnt = step_rec->job_ptr->alloc_lps_cnt; - if (cred_arg.alloc_lps_cnt > 0) { - cred_arg.alloc_lps = xmalloc(cred_arg.alloc_lps_cnt * + + cred_arg.alloc_lps_cnt = job_ptr->alloc_lps_cnt; + if ((cred_arg.alloc_lps_cnt > 0) && + bit_equal(job_ptr->node_bitmap, step_rec->step_node_bitmap)) { + cred_arg.alloc_lps = xmalloc(cred_arg.alloc_lps_cnt * sizeof(uint32_t)); - memcpy(cred_arg.alloc_lps, step_rec->job_ptr->alloc_lps, - cred_arg.alloc_lps_cnt*sizeof(uint32_t)); - } else + memcpy(cred_arg.alloc_lps, step_rec->job_ptr->alloc_lps, + cred_arg.alloc_lps_cnt*sizeof(uint32_t)); + } else if (cred_arg.alloc_lps_cnt > 0) { + /* Construct an array of allocated CPUs per node. + * Translate from array based upon job's allocation + * to array based upon nodes allocated to the step. */ + int i, job_inx = -1, step_inx = -1; + int job_inx_target = job_ptr->node_cnt; + cred_arg.alloc_lps = xmalloc(cred_arg.alloc_lps_cnt * + sizeof(uint32_t)); + for (i=0; i<node_record_count; i++) { + if (!bit_test(job_ptr->node_bitmap, i)) + continue; + job_inx++; + if (!bit_test(step_rec->step_node_bitmap, i)) + continue; + step_inx++; + cred_arg.alloc_lps[step_inx] = + job_ptr->alloc_lps[job_inx]; + if (job_inx == job_inx_target) + break; + } + cred_arg.alloc_lps_cnt = step_inx + 1; + } else { + error("No resources allocated to job %u", job_ptr->job_id); cred_arg.alloc_lps = NULL; + } *slurm_cred = slurm_cred_create(slurmctld_config.cred_ctx, &cred_arg); @@ -697,8 +725,8 @@ static void _slurm_rpc_dump_jobs(slurm_msg_t * msg) g_slurm_auth_get_uid(msg->auth_cred, NULL)); unlock_slurmctld(job_read_lock); END_TIMER2("_slurm_rpc_dump_jobs"); - debug2("_slurm_rpc_dump_jobs, size=%d %s", - dump_size, TIME_STR); +/* info("_slurm_rpc_dump_jobs, size=%d %s", */ +/* dump_size, TIME_STR); */ /* init response_msg structure */ slurm_msg_t_init(&response_msg); @@ -713,6 +741,44 @@ static void _slurm_rpc_dump_jobs(slurm_msg_t * msg) } } +/* _slurm_rpc_dump_job_single - process RPC for one job's state information */ +static void _slurm_rpc_dump_job_single(slurm_msg_t * msg) +{ + DEF_TIMERS; + char *dump = NULL; + int dump_size, rc; + slurm_msg_t response_msg; + job_id_msg_t *job_info_request_msg = (job_id_msg_t *) msg->data; + /* Locks: Read config job, write node (for hiding) */ + slurmctld_lock_t job_read_lock = { + READ_LOCK, READ_LOCK, NO_LOCK, WRITE_LOCK }; + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); + + START_TIMER; + debug2("Processing RPC: REQUEST_JOB_INFO_SINGLE from uid=%u", + (unsigned int) uid); + lock_slurmctld(job_read_lock); + + rc = pack_one_job(&dump, &dump_size, job_info_request_msg->job_id, + g_slurm_auth_get_uid(msg->auth_cred, NULL)); + unlock_slurmctld(job_read_lock); + END_TIMER2("_slurm_rpc_dump_job_single"); +/* info("_slurm_rpc_dump_job_single, size=%d %s",dump_size, TIME_STR); */ + + /* init response_msg structure */ + if (rc != SLURM_SUCCESS) { + slurm_send_rc_msg(msg, rc); + } else { + slurm_msg_t_init(&response_msg); + response_msg.address = msg->address; + response_msg.msg_type = RESPONSE_JOB_INFO; + response_msg.data = dump; + response_msg.data_size = dump_size; + slurm_send_node_msg(msg->conn_fd, &response_msg); + } + xfree(dump); +} + /* _slurm_rpc_end_time - Process RPC for job end time */ static void _slurm_rpc_end_time(slurm_msg_t * msg) { @@ -767,7 +833,8 @@ static void _slurm_rpc_dump_nodes(slurm_msg_t * msg) (unsigned int) uid); lock_slurmctld(node_read_lock); - if (slurmctld_conf.private_data && !validate_super_user(uid)) { + if ((slurmctld_conf.private_data & PRIVATE_DATA_NODES) + && (!validate_super_user(uid))) { unlock_slurmctld(node_read_lock); error("Security violation, REQUEST_NODE_INFO RPC from uid=%d", uid); slurm_send_rc_msg(msg, ESLURM_USER_ID_MISSING); @@ -816,7 +883,8 @@ static void _slurm_rpc_dump_partitions(slurm_msg_t * msg) part_req_msg = (part_info_request_msg_t *) msg->data; lock_slurmctld(part_read_lock); - if (slurmctld_conf.private_data && !validate_super_user(uid)) { + if ((slurmctld_conf.private_data & PRIVATE_DATA_PARTITIONS) + && (!validate_super_user(uid))) { unlock_slurmctld(part_read_lock); debug2("Security violation, PARTITION_INFO RPC from uid=%d", uid); slurm_send_rc_msg(msg, ESLURM_USER_ID_MISSING); @@ -862,7 +930,7 @@ static void _slurm_rpc_epilog_complete(slurm_msg_t * msg) debug2("Processing RPC: MESSAGE_EPILOG_COMPLETE uid=%u", (unsigned int) uid); lock_slurmctld(job_write_lock); - if (slurmctld_conf.private_data && !validate_super_user(uid)) { + if (!validate_super_user(uid)) { unlock_slurmctld(job_write_lock); error("Security violation, EPILOG_COMPLETE RPC from uid=%u", (unsigned int) uid); @@ -2212,7 +2280,8 @@ static void _slurm_rpc_node_select_info(slurm_msg_t * msg) debug2("Processing RPC: REQUEST_NODE_SELECT_INFO from uid=%u", (unsigned int) uid); lock_slurmctld(config_read_lock); - if (slurmctld_conf.private_data && !validate_super_user(uid)) { + if ((slurmctld_conf.private_data & PRIVATE_DATA_NODES) + && (!validate_super_user(uid))) { error_code = ESLURM_USER_ID_MISSING; error("Security violation, NODE_SELECT_INFO RPC from uid=u", (unsigned int) uid); @@ -2606,7 +2675,7 @@ int _launch_batch_step(job_desc_msg_t *job_desc_msg, uid_t uid, launch_msg_ptr->uid = uid; launch_msg_ptr->nodes = xstrdup(job_ptr->nodes); - if (make_batch_job_cred(launch_msg_ptr)) { + if (make_batch_job_cred(launch_msg_ptr, job_ptr)) { error("aborting batch step %u.%u", job_ptr->job_id, job_ptr->group_id); xfree(launch_msg_ptr->nodes); @@ -2861,10 +2930,13 @@ inline static void _slurm_rpc_accounting_update_msg(slurm_msg_t *msg) case ACCT_ADD_ASSOC: case ACCT_MODIFY_ASSOC: case ACCT_REMOVE_ASSOC: - rc = assoc_mgr_update_local_assocs( - object); + rc = assoc_mgr_update_local_assocs(object); break; + case ACCT_ADD_QOS: + case ACCT_REMOVE_QOS: case ACCT_UPDATE_NOTSET: + rc = assoc_mgr_update_local_qos(object); + break; default: error("unknown type set in update_object: %d", object->type); diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 3bfa240f3..8ef6ed0d5 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -331,7 +331,8 @@ struct job_details { uint16_t ntasks_per_node; /* number of tasks on each node */ /* job constraints: */ uint32_t job_min_procs; /* minimum processors per node */ - uint32_t job_min_memory; /* minimum memory per node, MB */ + uint32_t job_min_memory; /* minimum memory per node (MB) OR + * memory per allocated CPU | MEM_PER_CPU */ uint32_t job_min_tmp_disk; /* minimum tempdisk per node, MB */ char *err; /* pathname of job's stderr file */ char *in; /* pathname of job's stdin file */ @@ -469,7 +470,7 @@ struct step_record { time_t pre_sus_time; /* time step ran prior to last suspend */ time_t tot_sus_time; /* total time in suspended state */ bitstr_t *step_node_bitmap; /* bitmap of nodes allocated to job - step */ + * step */ uint16_t port; /* port for srun communications */ char *host; /* host for srun communications */ uint16_t batch_step; /* 1 if batch job step, 0 otherwise */ @@ -485,9 +486,9 @@ struct step_record { uint32_t exit_code; /* highest exit code from any task */ bitstr_t *exit_node_bitmap; /* bitmap of exited nodes */ jobacctinfo_t *jobacct; /* keep track of process info in the - step */ + * step */ slurm_step_layout_t *step_layout;/* info about how tasks are laid out - in the step */ + * in the step */ }; extern List job_list; /* list of job_record entries */ @@ -1210,6 +1211,20 @@ extern void pack_job (struct job_record *dump_job_ptr, Buf buffer); */ extern void pack_part (struct part_record *part_ptr, Buf buffer); +/* + * pack_one_job - dump information for one jobs in + * machine independent form (for network transmission) + * OUT buffer_ptr - the pointer is set to the allocated buffer. + * OUT buffer_size - set to size of the buffer in bytes + * IN job_id - ID of job that we want info for + * IN uid - uid of user making request (for partition filtering) + * NOTE: the buffer at *buffer_ptr must be xfreed by the caller + * NOTE: change _unpack_job_desc_msg() in common/slurm_protocol_pack.c + * whenever the data format changes + */ +extern int pack_one_job(char **buffer_ptr, int *buffer_size, + uint32_t job_id, uid_t uid); + /* part_filter_clear - Clear the partition's hidden flag based upon a user's * group access. This must follow a call to part_filter_set() */ extern void part_filter_clear(void); @@ -1219,7 +1234,7 @@ extern void part_filter_clear(void); extern void part_filter_set(uid_t uid); /* part_fini - free all memory associated with partition records */ -void part_fini (void); +extern void part_fini (void); /* * purge_old_job - purge old job records. diff --git a/src/slurmctld/srun_comm.c b/src/slurmctld/srun_comm.c index 322c6dbb9..8bcfb519b 100644 --- a/src/slurmctld/srun_comm.c +++ b/src/slurmctld/srun_comm.c @@ -107,6 +107,28 @@ extern void srun_allocate (uint32_t job_id) } } +/* + * srun_allocate_abort - notify srun of a resource allocation failure + * IN job_id - id of the job allocated resource + */ +extern void srun_allocate_abort(struct job_record *job_ptr) +{ + if (job_ptr && job_ptr->alloc_resp_port && job_ptr->alloc_node + && job_ptr->resp_host) { + slurm_addr * addr; + srun_job_complete_msg_t *msg_arg; + addr = xmalloc(sizeof(struct sockaddr_in)); + slurm_set_addr(addr, job_ptr->alloc_resp_port, + job_ptr->resp_host); + msg_arg = xmalloc(sizeof(srun_timeout_msg_t)); + msg_arg->job_id = job_ptr->job_id; + msg_arg->step_id = NO_VAL; + _srun_agent_launch(addr, job_ptr->alloc_node, + SRUN_JOB_COMPLETE, + msg_arg); + } +} + /* * srun_node_fail - notify srun of a node's failure * IN job_id - id of job to notify diff --git a/src/slurmctld/srun_comm.h b/src/slurmctld/srun_comm.h index 6e796100b..7858e5a2b 100644 --- a/src/slurmctld/srun_comm.h +++ b/src/slurmctld/srun_comm.h @@ -49,6 +49,12 @@ */ extern void srun_allocate (uint32_t job_id); +/* + * srun_allocate_abort - notify srun of a resource allocation failure + * IN job_id - id of the job allocated resource + */ +extern void srun_allocate_abort(struct job_record *job_ptr); + /* * srun_exec - request that srun execute a specific command * and route it's output to stdout diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index cef575330..0fe8834bc 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * step_mgr.c - manage the job step information of slurm - * $Id: step_mgr.c 13858 2008-04-11 19:29:30Z jette $ + * $Id: step_mgr.c 14548 2008-07-17 22:00:36Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -1132,7 +1132,7 @@ extern int pack_ctld_job_step_info_response_msg(uint32_t job_id, (job_ptr->part_ptr->hidden)) continue; - if (slurmctld_conf.private_data + if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS) && (job_ptr->user_id != uid) && !validate_super_user(uid)) continue; @@ -1156,7 +1156,7 @@ extern int pack_ctld_job_step_info_response_msg(uint32_t job_id, (job_ptr->part_ptr) && (job_ptr->part_ptr->hidden)) job_ptr = NULL; - else if (slurmctld_conf.private_data + else if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS) && (job_ptr->user_id != uid) && !validate_super_user(uid)) job_ptr = NULL; @@ -1180,7 +1180,7 @@ extern int pack_ctld_job_step_info_response_msg(uint32_t job_id, && (job_ptr->part_ptr) && (job_ptr->part_ptr->hidden)) job_ptr = NULL; - else if (slurmctld_conf.private_data + else if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS) && (job_ptr->user_id != uid) && !validate_super_user(uid)) job_ptr = NULL; diff --git a/src/slurmd/slurmd/Makefile.am b/src/slurmd/slurmd/Makefile.am index ec4cebd46..bab7c9b77 100644 --- a/src/slurmd/slurmd/Makefile.am +++ b/src/slurmd/slurmd/Makefile.am @@ -11,8 +11,7 @@ INCLUDES = -I$(top_srcdir) slurmd_LDADD = \ $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libeio.la \ - $(top_builddir)/src/common/libcommon.o -ldl \ + $(top_builddir)/src/api/libslurm.o -ldl \ $(PLPA_LIBS) slurmd_test_LDADD = $(slurmd_LDADD) diff --git a/src/slurmd/slurmd/Makefile.in b/src/slurmd/slurmd/Makefile.in index d92d8a394..9a70905e6 100644 --- a/src/slurmd/slurmd/Makefile.in +++ b/src/slurmd/slurmd/Makefile.in @@ -81,16 +81,14 @@ am_slurmd_OBJECTS = $(am__objects_1) config.$(OBJEXT) slurmd_OBJECTS = $(am_slurmd_OBJECTS) am__DEPENDENCIES_1 = slurmd_DEPENDENCIES = $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libeio.la \ - $(top_builddir)/src/common/libcommon.o $(am__DEPENDENCIES_1) + $(top_builddir)/src/api/libslurm.o $(am__DEPENDENCIES_1) slurmd_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(slurmd_LDFLAGS) \ $(LDFLAGS) -o $@ am_slurmd_test_OBJECTS = $(am__objects_1) testconfig.$(OBJEXT) slurmd_test_OBJECTS = $(am_slurmd_test_OBJECTS) am__DEPENDENCIES_2 = $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libeio.la \ - $(top_builddir)/src/common/libcommon.o $(am__DEPENDENCIES_1) + $(top_builddir)/src/api/libslurm.o $(am__DEPENDENCIES_1) slurmd_test_DEPENDENCIES = $(am__DEPENDENCIES_2) slurmd_test_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ @@ -283,8 +281,7 @@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) slurmd_LDADD = \ $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libeio.la \ - $(top_builddir)/src/common/libcommon.o -ldl \ + $(top_builddir)/src/api/libslurm.o -ldl \ $(PLPA_LIBS) slurmd_test_LDADD = $(slurmd_LDADD) diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index dab7c08ee..d5314fced 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -612,46 +612,40 @@ _forkexec_slurmstepd(slurmd_step_type_t type, void *req, */ static int _check_job_credential(launch_tasks_request_msg_t *req, uid_t uid, - int tasks_to_launch, hostset_t *step_hset) + int node_id, hostset_t *step_hset) { slurm_cred_arg_t arg; hostset_t hset = NULL; - bool user_ok = _slurm_authorized_user(uid); + bool user_ok = _slurm_authorized_user(uid); + bool verified = true; int host_index = -1; int rc; slurm_cred_t cred = req->cred; uint32_t jobid = req->job_id; uint32_t stepid = req->job_step_id; - + int tasks_to_launch = req->tasks_to_launch[node_id]; /* * First call slurm_cred_verify() so that all valid * credentials are checked */ if ((rc = slurm_cred_verify(conf->vctx, cred, &arg)) < 0) { - if (!user_ok) { + verified = false; + if (!user_ok) return SLURM_ERROR; - } else { + else { debug("_check_job_credential slurm_cred_verify failed:" " %m, but continuing anyway."); } } - /* Overwrite any memory limits in the RPC with - * contents of the credential */ - req->job_mem = arg.job_mem; - req->task_mem = arg.task_mem; - - /* - * If uid is the slurm user id or root, do not bother - * performing validity check of the credential - */ - if (user_ok) { + /* If uid is the SlurmUser or root and the credential is bad, + * then do not attempt validating the credential */ + if (!verified) { *step_hset = NULL; if (rc >= 0) { if ((hset = hostset_create(arg.hostlist))) *step_hset = hset; - xfree(arg.hostlist); - xfree(arg.alloc_lps); + slurm_cred_free_args(&arg); } return SLURM_SUCCESS; } @@ -684,12 +678,11 @@ _check_job_credential(launch_tasks_request_msg_t *req, uid_t uid, } if ((arg.alloc_lps_cnt > 0) && (tasks_to_launch > 0)) { - host_index = hostset_find(hset, conf->node_name); - /* Left in here for debugging purposes */ #if(0) - if(host_index >= 0) + /* Left for debugging purposes */ + if (host_index >= 0) info(" cons_res %u alloc_lps_cnt %u " "task[%d] = %u = task_to_launch %d host %s ", arg.jobid, arg.alloc_lps_cnt, host_index, @@ -698,35 +691,53 @@ _check_job_credential(launch_tasks_request_msg_t *req, uid_t uid, #endif if (host_index < 0) { - error("job cr credential invalid host_index %d for job %u", - host_index, arg.jobid); + error("job cr credential invalid host_index %d for " + "job %u", host_index, arg.jobid); goto fail; } - + if (host_index > arg.alloc_lps_cnt) + error("host_index > alloc_lps_cnt in credential"); + else if (arg.alloc_lps[host_index] == 0) + error("cons_res: zero processors allocated to step"); if (tasks_to_launch > arg.alloc_lps[host_index]) { - error("cons_res: More than one tasks per logical " + /* This is expected with the --overcommit option */ + verbose("cons_res: More than one tasks per logical " "processor (%d > %u) on host [%u.%u %ld %s] ", tasks_to_launch, arg.alloc_lps[host_index], arg.jobid, arg.stepid, (long) arg.uid, arg.hostlist); - error(" cons_res: Use task/affinity plug-in to bind " + verbose("cons_res: Use task/affinity plug-in to bind " "the tasks to the allocated resources"); } } + /* Overwrite any memory limits in the RPC with contents of the + * memory limit within the credential. + * Reset the CPU count on this node to correct value. */ + if (arg.job_mem & MEM_PER_CPU) { + req->job_mem = arg.job_mem & (~MEM_PER_CPU); + if ((host_index >= 0) && (host_index < arg.alloc_lps_cnt) && + (arg.alloc_lps[host_index] > 0)) + req->job_mem *= arg.alloc_lps[host_index]; + } else + req->job_mem = arg.job_mem; + req->task_mem = arg.task_mem; /* Defunct */ + if ((host_index >= 0) && (host_index < arg.alloc_lps_cnt)) + req->cpus_allocated[node_id] = arg.alloc_lps[host_index]; +#if 0 + info("mem orig:%u cpus:%u limit:%u", + arg.job_mem, arg.alloc_lps[host_index], req->job_mem); +#endif + *step_hset = hset; - xfree(arg.hostlist); - arg.alloc_lps_cnt = 0; - xfree(arg.alloc_lps); + slurm_cred_free_args(&arg); return SLURM_SUCCESS; fail: if (hset) hostset_destroy(hset); *step_hset = NULL; - xfree(arg.hostlist); - arg.alloc_lps_cnt = 0; - xfree(arg.alloc_lps); + slurm_cred_free_args(&arg); slurm_seterrno_ret(ESLURMD_INVALID_JOB_CREDENTIAL); } @@ -767,8 +778,7 @@ _rpc_launch_tasks(slurm_msg_t *msg) req->job_step_id, req->uid, req->gid, host, port); first_job_run = !slurm_cred_jobid_cached(conf->vctx, req->job_id); - if (_check_job_credential(req, req_uid, req->tasks_to_launch[nodeid], - &step_hset) < 0) { + if (_check_job_credential(req, req_uid, nodeid, &step_hset) < 0) { errnum = errno; error("Invalid job credential from %ld@%s: %m", (long) req_uid, host); @@ -809,7 +819,9 @@ _rpc_launch_tasks(slurm_msg_t *msg) job_limits_ptr->job_id = req->job_id; list_append(job_limits_list, job_limits_ptr); } - job_limits_ptr->job_mem = req->job_mem; /* reset limit */ + /* reset memory limit based upon value calculated in + * _check_job_credential() above */ + job_limits_ptr->job_mem = req->job_mem; slurm_mutex_unlock(&job_limits_mutex); } @@ -920,6 +932,28 @@ _get_user_env(batch_job_launch_msg_t *req) xfree(pwd_buf); } +/* The RPC currently contains a memory size limit, but we load the + * value from the job credential to be certain it has not been + * altered by the user */ +static void +_set_batch_job_limits(slurm_msg_t *msg) +{ + slurm_cred_arg_t arg; + batch_job_launch_msg_t *req = (batch_job_launch_msg_t *)msg->data; + + if (slurm_cred_get_args(req->cred, &arg) != SLURM_SUCCESS) + return; + + if (arg.job_mem & MEM_PER_CPU) { + req->job_mem = arg.job_mem & (~MEM_PER_CPU); + if (arg.alloc_lps_cnt > 1) + req->job_mem *= arg.alloc_lps_cnt; + } else + req->job_mem = arg.job_mem; + + slurm_cred_free_args(&arg); +} + static void _rpc_batch_job(slurm_msg_t *msg) { @@ -987,6 +1021,8 @@ _rpc_batch_job(slurm_msg_t *msg) goto done; } } + _get_user_env(req); + _set_batch_job_limits(msg); /* Since job could have been killed while the prolog was * running (especially on BlueGene, which can take minutes @@ -998,7 +1034,6 @@ _rpc_batch_job(slurm_msg_t *msg) rc = ESLURMD_CREDENTIAL_REVOKED; /* job already ran */ goto done; } - _get_user_env(req); slurm_mutex_lock(&launch_mutex); if (req->step_id == SLURM_BATCH_SCRIPT) diff --git a/src/slurmd/slurmstepd/Makefile.am b/src/slurmd/slurmstepd/Makefile.am index 4e344a188..4c090b9bb 100644 --- a/src/slurmd/slurmstepd/Makefile.am +++ b/src/slurmd/slurmstepd/Makefile.am @@ -9,9 +9,7 @@ INCLUDES = -I$(top_srcdir) slurmstepd_LDADD = \ $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libeio.la \ - $(top_builddir)/src/common/libspank.la \ - $(top_builddir)/src/common/libcommon.o -ldl\ + $(top_builddir)/src/api/libslurm.o -ldl\ $(PLPA_LIBS) $(PAM_LIBS) $(UTIL_LIBS) slurmstepd_SOURCES = \ diff --git a/src/slurmd/slurmstepd/Makefile.in b/src/slurmd/slurmstepd/Makefile.in index 5bc200c7e..61bdc4cb7 100644 --- a/src/slurmd/slurmstepd/Makefile.in +++ b/src/slurmd/slurmstepd/Makefile.in @@ -81,9 +81,7 @@ am_slurmstepd_OBJECTS = slurmstepd.$(OBJEXT) mgr.$(OBJEXT) \ slurmstepd_OBJECTS = $(am_slurmstepd_OBJECTS) am__DEPENDENCIES_1 = slurmstepd_DEPENDENCIES = $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libeio.la \ - $(top_builddir)/src/common/libspank.la \ - $(top_builddir)/src/common/libcommon.o $(am__DEPENDENCIES_1) \ + $(top_builddir)/src/api/libslurm.o $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) slurmstepd_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ @@ -276,9 +274,7 @@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) slurmstepd_LDADD = \ $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libeio.la \ - $(top_builddir)/src/common/libspank.la \ - $(top_builddir)/src/common/libcommon.o -ldl\ + $(top_builddir)/src/api/libslurm.o -ldl\ $(PLPA_LIBS) $(PAM_LIBS) $(UTIL_LIBS) slurmstepd_SOURCES = \ diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index 62862620c..4b5ef4d48 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/mgr.c - job manager functions for slurmstepd - * $Id: mgr.c 14238 2008-06-11 21:54:28Z jette $ + * $Id: mgr.c 14504 2008-07-14 17:38:53Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -1801,7 +1801,7 @@ _run_script_as_user(const char *name, const char *path, slurmd_job_t *job, break; } else if (rc == 0) { sleep(1); - if ((--max_wait) == 0) { + if ((--max_wait) <= 0) { killpg(cpid, SIGKILL); opt = 0; } @@ -1810,7 +1810,7 @@ _run_script_as_user(const char *name, const char *path, slurmd_job_t *job, break; } } - /* Insure that all child processes get killed */ + /* Insure that all child processes get killed, one last time */ killpg(cpid, SIGKILL); slurm_container_signal(job->cont_id, SIGKILL); diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c index e00850f62..0f82a07c7 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.c +++ b/src/slurmd/slurmstepd/slurmstepd_job.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/slurmstepd_job.c - slurmd_job_t routines - * $Id: slurmstepd_job.c 13755 2008-04-01 19:12:53Z jette $ + * $Id: slurmstepd_job.c 14546 2008-07-17 21:03:59Z jette $ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -213,6 +213,7 @@ job_create(launch_tasks_request_msg_t *msg) job->mem_bind_type = msg->mem_bind_type; job->mem_bind = xstrdup(msg->mem_bind); job->ckpt_path = xstrdup(msg->ckpt_path); + job->cpus_per_task = msg->cpus_per_task; job->env = _array_copy(msg->envc, msg->env); job->eio = eio_handle_create(); @@ -325,6 +326,7 @@ job_batch_job_create(batch_job_launch_msg_t *msg) job->state = SLURMSTEPD_STEP_STARTING; job->pwd = pwd; + job->cpus = msg->cpus_per_node[0]; job->ntasks = 1; job->nprocs = msg->nprocs; job->jobid = msg->job_id; @@ -364,7 +366,8 @@ job_batch_job_create(batch_job_launch_msg_t *msg) job->envtp->mem_bind_type = 0; job->envtp->mem_bind = NULL; job->envtp->ckpt_path = NULL; - + job->cpus_per_task = msg->cpus_per_node[0]; + srun = srun_info_create(NULL, NULL, NULL); list_append(job->sruns, (void *) srun); diff --git a/src/slurmd/slurmstepd/slurmstepd_job.h b/src/slurmd/slurmstepd/slurmstepd_job.h index 5b0a653ef..5c1419d65 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.h +++ b/src/slurmd/slurmstepd/slurmstepd_job.h @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/slurmstepd_job.h slurmd_job_t definition - * $Id: slurmstepd_job.h 13672 2008-03-19 23:10:58Z jette $ + * $Id: slurmstepd_job.h 14546 2008-07-17 21:03:59Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -116,6 +116,7 @@ typedef struct slurmd_job { uint32_t nprocs; /* total number of processes in current job */ uint32_t nodeid; /* relative position of this node in job */ uint32_t ntasks; /* number of tasks on *this* node */ + uint32_t cpus_per_task; /* number of cpus desired per task */ uint32_t debug; /* debug level for job slurmd */ uint32_t job_mem; /* MB of memory reserved for the job */ uint32_t task_mem; /* MB of memory reserved for each task */ diff --git a/src/slurmd/slurmstepd/task.c b/src/slurmd/slurmstepd/task.c index 339ff6c4a..087e0bcc8 100644 --- a/src/slurmd/slurmstepd/task.c +++ b/src/slurmd/slurmstepd/task.c @@ -357,7 +357,7 @@ exec_task(slurmd_job_t *job, int i, int waitfd) if (job->multi_prog && task->argv[0]) { /* - * Normally the client (srun/slauch) expands the command name + * Normally the client (srun) expands the command name * to a fully qualified path, but in --multi-prog mode it * is left up to the server to search the PATH for the * executable. diff --git a/src/slurmdbd/Makefile.am b/src/slurmdbd/Makefile.am index c8c9e1613..e2f99b404 100644 --- a/src/slurmdbd/Makefile.am +++ b/src/slurmdbd/Makefile.am @@ -10,7 +10,7 @@ sbin_PROGRAMS = slurmdbd slurmdbd_LDADD = \ $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libcommon.o -ldl + $(top_builddir)/src/api/libslurm.o -ldl slurmdbd_SOURCES = \ diff --git a/src/slurmdbd/Makefile.in b/src/slurmdbd/Makefile.in index 450be380c..4da8d7e0e 100644 --- a/src/slurmdbd/Makefile.in +++ b/src/slurmdbd/Makefile.in @@ -75,7 +75,7 @@ am_slurmdbd_OBJECTS = agent.$(OBJEXT) proc_req.$(OBJEXT) \ read_config.$(OBJEXT) rpc_mgr.$(OBJEXT) slurmdbd.$(OBJEXT) slurmdbd_OBJECTS = $(am_slurmdbd_OBJECTS) slurmdbd_DEPENDENCIES = $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libcommon.o + $(top_builddir)/src/api/libslurm.o slurmdbd_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(slurmdbd_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -268,7 +268,7 @@ CLEANFILES = core.* INCLUDES = -I$(top_srcdir) slurmdbd_LDADD = \ $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libcommon.o -ldl + $(top_builddir)/src/api/libslurm.o -ldl slurmdbd_SOURCES = \ agent.c \ diff --git a/src/slurmdbd/proc_req.c b/src/slurmdbd/proc_req.c index 21952ea41..a56331026 100644 --- a/src/slurmdbd/proc_req.c +++ b/src/slurmdbd/proc_req.c @@ -55,18 +55,30 @@ static int _add_assocs(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid); static int _add_clusters(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _add_qos(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); static int _add_users(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid); static int _cluster_procs(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid); -static int _get_accounts(void *db_conn, Buf in_buffer, Buf *out_buffer); -static int _get_assocs(void *db_conn, Buf in_buffer, Buf *out_buffer); -static int _get_clusters(void *db_conn, Buf in_buffer, Buf *out_buffer); -static int _get_jobs(void *db_conn, Buf in_buffer, Buf *out_buffer); -static int _get_jobs_cond(void *db_conn, Buf in_buffer, Buf *out_buffer); +static int _get_accounts(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _get_assocs(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _get_clusters(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _get_jobs(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _get_jobs_cond(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _get_qos(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _get_txn(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); static int _get_usage(uint16_t type, void *db_conn, - Buf in_buffer, Buf *out_buffer); -static int _get_users(void *db_conn, Buf in_buffer, Buf *out_buffer); + Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _get_users(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); static int _flush_jobs(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid); static void *_init_conn(Buf in_buffer, Buf *out_buffer, uint32_t *uid); @@ -99,6 +111,8 @@ static int _remove_assocs(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid); static int _remove_clusters(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _remove_qos(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); static int _remove_users(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid); static int _roll_usage(void *db_conn, @@ -153,6 +167,9 @@ proc_req(void **db_conn, slurm_fd orig_fd, rc = _add_clusters(*db_conn, in_buffer, out_buffer, uid); break; + case DBD_ADD_QOS: + rc = _add_qos(*db_conn, in_buffer, out_buffer, uid); + break; case DBD_ADD_USERS: rc = _add_users(*db_conn, in_buffer, out_buffer, uid); break; @@ -161,27 +178,36 @@ proc_req(void **db_conn, slurm_fd orig_fd, in_buffer, out_buffer, uid); break; case DBD_GET_ACCOUNTS: - rc = _get_accounts(*db_conn, in_buffer, out_buffer); + rc = _get_accounts(*db_conn, + in_buffer, out_buffer, uid); break; case DBD_GET_ASSOCS: - rc = _get_assocs(*db_conn, in_buffer, out_buffer); + rc = _get_assocs(*db_conn, in_buffer, out_buffer, uid); break; case DBD_GET_ASSOC_USAGE: case DBD_GET_CLUSTER_USAGE: rc = _get_usage(msg_type, *db_conn, - in_buffer, out_buffer); + in_buffer, out_buffer, uid); break; case DBD_GET_CLUSTERS: - rc = _get_clusters(*db_conn, in_buffer, out_buffer); + rc = _get_clusters(*db_conn, + in_buffer, out_buffer, uid); break; case DBD_GET_JOBS: - rc = _get_jobs(*db_conn, in_buffer, out_buffer); + rc = _get_jobs(*db_conn, in_buffer, out_buffer, uid); break; case DBD_GET_JOBS_COND: - rc = _get_jobs_cond(*db_conn, in_buffer, out_buffer); + rc = _get_jobs_cond(*db_conn, + in_buffer, out_buffer, uid); + break; + case DBD_GET_QOS: + rc = _get_qos(*db_conn, in_buffer, out_buffer, uid); + break; + case DBD_GET_TXN: + rc = _get_txn(*db_conn, in_buffer, out_buffer, uid); break; case DBD_GET_USERS: - rc = _get_users(*db_conn, in_buffer, out_buffer); + rc = _get_users(*db_conn, in_buffer, out_buffer, uid); break; case DBD_FLUSH_JOBS: rc = _flush_jobs(*db_conn, in_buffer, out_buffer, uid); @@ -253,6 +279,9 @@ proc_req(void **db_conn, slurm_fd orig_fd, rc = _remove_clusters(*db_conn, in_buffer, out_buffer, uid); break; + case DBD_REMOVE_QOS: + rc = _remove_qos(*db_conn, in_buffer, out_buffer, uid); + break; case DBD_REMOVE_USERS: rc = _remove_users(*db_conn, in_buffer, out_buffer, uid); @@ -513,6 +542,42 @@ end_it: *out_buffer = make_dbd_rc_msg(rc, comment, DBD_ADD_CLUSTERS); return rc; } + +static int _add_qos(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) +{ + int rc = SLURM_SUCCESS; + dbd_list_msg_t *get_msg = NULL; + char *comment = NULL; + + debug2("DBD_ADD_QOS: called"); + if(*uid != slurmdbd_conf->slurm_user_id + && (assoc_mgr_get_admin_level(db_conn, *uid) + < ACCT_ADMIN_SUPER_USER)) { + comment = "Your user doesn't have privilege to preform this action"; + error("%s", comment); + rc = ESLURM_ACCESS_DENIED; + goto end_it; + } + + if (slurmdbd_unpack_list_msg(DBD_ADD_QOS, &get_msg, in_buffer) != + SLURM_SUCCESS) { + comment = "Failed to unpack DBD_ADD_QOS message"; + error("%s", comment); + rc = SLURM_ERROR; + goto end_it; + } + + rc = acct_storage_g_add_qos(db_conn, *uid, get_msg->my_list); + if(rc != SLURM_SUCCESS) + comment = "Failed to add qos."; + +end_it: + slurmdbd_free_list_msg(get_msg); + *out_buffer = make_dbd_rc_msg(rc, comment, DBD_ADD_QOS); + return rc; +} + static int _add_users(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid) { @@ -596,7 +661,8 @@ end_it: return rc; } -static int _get_accounts(void *db_conn, Buf in_buffer, Buf *out_buffer) +static int _get_accounts(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) { dbd_cond_msg_t *get_msg = NULL; dbd_list_msg_t list_msg; @@ -625,7 +691,8 @@ static int _get_accounts(void *db_conn, Buf in_buffer, Buf *out_buffer) return SLURM_SUCCESS; } -static int _get_assocs(void *db_conn, Buf in_buffer, Buf *out_buffer) +static int _get_assocs(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) { dbd_cond_msg_t *get_msg = NULL; dbd_list_msg_t list_msg; @@ -655,7 +722,8 @@ static int _get_assocs(void *db_conn, Buf in_buffer, Buf *out_buffer) return SLURM_SUCCESS; } -static int _get_clusters(void *db_conn, Buf in_buffer, Buf *out_buffer) +static int _get_clusters(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) { dbd_cond_msg_t *get_msg = NULL; dbd_list_msg_t list_msg; @@ -685,7 +753,8 @@ static int _get_clusters(void *db_conn, Buf in_buffer, Buf *out_buffer) return SLURM_SUCCESS; } -static int _get_jobs(void *db_conn, Buf in_buffer, Buf *out_buffer) +static int _get_jobs(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) { dbd_get_jobs_msg_t *get_jobs_msg = NULL; dbd_list_msg_t list_msg; @@ -703,7 +772,12 @@ static int _get_jobs(void *db_conn, Buf in_buffer, Buf *out_buffer) } memset(&sacct_params, 0, sizeof(sacct_parameters_t)); - sacct_params.opt_cluster = get_jobs_msg->cluster_name; + if (get_jobs_msg->cluster_name) { + sacct_params.opt_cluster_list = list_create(NULL); + list_append(sacct_params.opt_cluster_list, + get_jobs_msg->cluster_name); + } + sacct_params.opt_uid = -1; if(get_jobs_msg->user) { struct passwd *pw = NULL; @@ -717,6 +791,8 @@ static int _get_jobs(void *db_conn, Buf in_buffer, Buf *out_buffer) &sacct_params); slurmdbd_free_get_jobs_msg(get_jobs_msg); + if(sacct_params.opt_cluster_list) + list_destroy(sacct_params.opt_cluster_list); *out_buffer = init_buf(1024); pack16((uint16_t) DBD_GOT_JOBS, *out_buffer); @@ -727,7 +803,8 @@ static int _get_jobs(void *db_conn, Buf in_buffer, Buf *out_buffer) return SLURM_SUCCESS; } -static int _get_jobs_cond(void *db_conn, Buf in_buffer, Buf *out_buffer) +static int _get_jobs_cond(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) { dbd_cond_msg_t *cond_msg = NULL; dbd_list_msg_t list_msg; @@ -756,8 +833,66 @@ static int _get_jobs_cond(void *db_conn, Buf in_buffer, Buf *out_buffer) return SLURM_SUCCESS; } +static int _get_qos(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) +{ + dbd_cond_msg_t *cond_msg = NULL; + dbd_list_msg_t list_msg; + char *comment = NULL; + + debug2("DBD_GET_QOS: called"); + if (slurmdbd_unpack_cond_msg(DBD_GET_QOS, &cond_msg, in_buffer) != + SLURM_SUCCESS) { + comment = "Failed to unpack DBD_GET_QOS message"; + error("%s", comment); + *out_buffer = make_dbd_rc_msg(SLURM_ERROR, comment, + DBD_GET_QOS); + return SLURM_ERROR; + } + + list_msg.my_list = acct_storage_g_get_qos(db_conn, cond_msg->cond); + slurmdbd_free_cond_msg(DBD_GET_QOS, cond_msg); + + *out_buffer = init_buf(1024); + pack16((uint16_t) DBD_GOT_QOS, *out_buffer); + slurmdbd_pack_list_msg(DBD_GOT_QOS, &list_msg, *out_buffer); + if(list_msg.my_list) + list_destroy(list_msg.my_list); + + return SLURM_SUCCESS; +} + +static int _get_txn(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) +{ + dbd_cond_msg_t *cond_msg = NULL; + dbd_list_msg_t list_msg; + char *comment = NULL; + + debug2("DBD_GET_TXN: called"); + if (slurmdbd_unpack_cond_msg(DBD_GET_TXN, &cond_msg, in_buffer) != + SLURM_SUCCESS) { + comment = "Failed to unpack DBD_GET_TXN message"; + error("%s", comment); + *out_buffer = make_dbd_rc_msg(SLURM_ERROR, comment, + DBD_GET_TXN); + return SLURM_ERROR; + } + + list_msg.my_list = acct_storage_g_get_txn(db_conn, cond_msg->cond); + slurmdbd_free_cond_msg(DBD_GET_TXN, cond_msg); + + *out_buffer = init_buf(1024); + pack16((uint16_t) DBD_GOT_TXN, *out_buffer); + slurmdbd_pack_list_msg(DBD_GOT_TXN, &list_msg, *out_buffer); + if(list_msg.my_list) + list_destroy(list_msg.my_list); + + return SLURM_SUCCESS; +} + static int _get_usage(uint16_t type, void *db_conn, - Buf in_buffer, Buf *out_buffer) + Buf in_buffer, Buf *out_buffer, uint32_t *uid) { dbd_usage_msg_t *get_msg = NULL; dbd_usage_msg_t got_msg; @@ -813,7 +948,8 @@ static int _get_usage(uint16_t type, void *db_conn, return SLURM_SUCCESS; } -static int _get_users(void *db_conn, Buf in_buffer, Buf *out_buffer) +static int _get_users(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) { dbd_cond_msg_t *get_msg = NULL; dbd_list_msg_t list_msg; @@ -890,10 +1026,13 @@ static void *_init_conn(Buf in_buffer, Buf *out_buffer, uint32_t *uid) rc = SLURM_ERROR; goto end_it; } - if (init_msg->version != SLURMDBD_VERSION) { + if ((init_msg->version < SLURMDBD_VERSION_MIN) || + (init_msg->version > SLURMDBD_VERSION)) { comment = "Incompatable RPC version"; - error("Incompatable RPC version (%d != %d)", - init_msg->version, SLURMDBD_VERSION); + error("Incompatable RPC version received " + "(%u not between %d and %d)", + init_msg->version, + SLURMDBD_VERSION_MIN, SLURMDBD_VERSION); goto end_it; } *uid = init_msg->uid; @@ -1725,6 +1864,67 @@ static int _remove_clusters(void *db_conn, return rc; } +static int _remove_qos(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) +{ + int rc = SLURM_SUCCESS; + dbd_cond_msg_t *get_msg = NULL; + dbd_list_msg_t list_msg; + char *comment = NULL; + + debug2("DBD_REMOVE_QOS: called"); + + if(*uid != slurmdbd_conf->slurm_user_id + && assoc_mgr_get_admin_level(db_conn, *uid) + < ACCT_ADMIN_SUPER_USER) { + comment = "Your user doesn't have privilege to preform this action"; + error("%s", comment); + *out_buffer = make_dbd_rc_msg(ESLURM_ACCESS_DENIED, + comment, DBD_REMOVE_QOS); + + return ESLURM_ACCESS_DENIED; + } + + if (slurmdbd_unpack_cond_msg(DBD_REMOVE_QOS, &get_msg, + in_buffer) != SLURM_SUCCESS) { + comment = "Failed to unpack DBD_REMOVE_QOS message"; + error("%s", comment); + *out_buffer = make_dbd_rc_msg(SLURM_ERROR, + comment, DBD_REMOVE_QOS); + return SLURM_ERROR; + } + + if(!(list_msg.my_list = acct_storage_g_remove_qos( + db_conn, *uid, get_msg->cond))) { + if(errno == ESLURM_ACCESS_DENIED) { + comment = "Your user doesn't have privilege to preform this action"; + rc = ESLURM_ACCESS_DENIED; + } else if(errno == SLURM_ERROR) { + comment = "Something was wrong with your query"; + rc = SLURM_ERROR; + } else if(errno == SLURM_NO_CHANGE_IN_DATA) { + comment = "Request didn't affect anything"; + rc = SLURM_SUCCESS; + } else { + comment = "Unknown issue"; + rc = SLURM_ERROR; + } + error("%s", comment); + slurmdbd_free_cond_msg(DBD_REMOVE_QOS, get_msg); + *out_buffer = make_dbd_rc_msg(rc, comment, DBD_REMOVE_QOS); + return rc; + } + + slurmdbd_free_cond_msg(DBD_REMOVE_QOS, get_msg); + *out_buffer = init_buf(1024); + pack16((uint16_t) DBD_GOT_LIST, *out_buffer); + slurmdbd_pack_list_msg(DBD_GOT_LIST, &list_msg, *out_buffer); + if(list_msg.my_list) + list_destroy(list_msg.my_list); + + return rc; +} + static int _remove_users(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid) { diff --git a/src/smap/Makefile.am b/src/smap/Makefile.am index 2ec5094dc..24f328c3d 100644 --- a/src/smap/Makefile.am +++ b/src/smap/Makefile.am @@ -15,7 +15,7 @@ bin_PROGRAMS = smap smap_LDADD = \ $(top_builddir)/src/plugins/select/bluegene/block_allocator/libbluegene_block_allocator.la \ - $(top_builddir)/src/api/libslurmhelper.la + $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = smap.h smap_SOURCES = smap.c \ diff --git a/src/smap/Makefile.in b/src/smap/Makefile.in index 06f6ec162..ac8923c35 100644 --- a/src/smap/Makefile.in +++ b/src/smap/Makefile.in @@ -88,7 +88,7 @@ am__EXTRA_smap_SOURCES_DIST = smap.h smap.c job_functions.c \ opts.c smap_OBJECTS = $(am_smap_OBJECTS) @HAVE_SOME_CURSES_TRUE@smap_DEPENDENCIES = $(top_builddir)/src/plugins/select/bluegene/block_allocator/libbluegene_block_allocator.la \ -@HAVE_SOME_CURSES_TRUE@ $(top_builddir)/src/api/libslurmhelper.la +@HAVE_SOME_CURSES_TRUE@ $(top_builddir)/src/api/libslurm.o smap_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(smap_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -282,7 +282,7 @@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) @HAVE_SOME_CURSES_TRUE@smap_LDADD = \ @HAVE_SOME_CURSES_TRUE@ $(top_builddir)/src/plugins/select/bluegene/block_allocator/libbluegene_block_allocator.la \ -@HAVE_SOME_CURSES_TRUE@ $(top_builddir)/src/api/libslurmhelper.la +@HAVE_SOME_CURSES_TRUE@ $(top_builddir)/src/api/libslurm.o -ldl @HAVE_SOME_CURSES_TRUE@noinst_HEADERS = smap.h @HAVE_SOME_CURSES_TRUE@smap_SOURCES = smap.c \ diff --git a/src/squeue/Makefile.am b/src/squeue/Makefile.am index 205a78bea..fb8f5d6ce 100644 --- a/src/squeue/Makefile.am +++ b/src/squeue/Makefile.am @@ -7,7 +7,7 @@ INCLUDES = -I$(top_srcdir) bin_PROGRAMS = squeue -squeue_LDADD = $(top_builddir)/src/api/libslurmhelper.la +squeue_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = squeue.h print.h squeue_SOURCES = squeue.c print.c opts.c sort.c diff --git a/src/squeue/Makefile.in b/src/squeue/Makefile.in index 1f178ae7d..1e87dec53 100644 --- a/src/squeue/Makefile.in +++ b/src/squeue/Makefile.in @@ -76,7 +76,7 @@ PROGRAMS = $(bin_PROGRAMS) am_squeue_OBJECTS = squeue.$(OBJEXT) print.$(OBJEXT) opts.$(OBJEXT) \ sort.$(OBJEXT) squeue_OBJECTS = $(am_squeue_OBJECTS) -squeue_DEPENDENCIES = $(top_builddir)/src/api/libslurmhelper.la +squeue_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o squeue_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(squeue_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -267,7 +267,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) -squeue_LDADD = $(top_builddir)/src/api/libslurmhelper.la +squeue_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = squeue.h print.h squeue_SOURCES = squeue.c print.c opts.c sort.c squeue_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) diff --git a/src/squeue/print.c b/src/squeue/print.c index 0985f5ed2..74b010636 100644 --- a/src/squeue/print.c +++ b/src/squeue/print.c @@ -892,6 +892,7 @@ int _print_job_min_memory(job_info_t * job, int width, bool right_justify, _print_str("MIN_MEMORY", width, right_justify, true); else { tmp_char[0] = '\0'; + job->job_min_memory &= (~MEM_PER_CPU); convert_num_unit((float)job->job_min_memory, min_mem, sizeof(min_mem), UNIT_NONE); strcat(tmp_char, min_mem); diff --git a/src/squeue/sort.c b/src/squeue/sort.c index 05c42f4f7..2a0bce637 100644 --- a/src/squeue/sort.c +++ b/src/squeue/sort.c @@ -455,6 +455,8 @@ static int _sort_job_by_min_memory(void *void1, void *void2) job_info_t *job1 = (job_info_t *) void1; job_info_t *job2 = (job_info_t *) void2; + job1->job_min_memory &= (~MEM_PER_CPU); + job2->job_min_memory &= (~MEM_PER_CPU); diff = job1->job_min_memory - job2->job_min_memory; if (reverse_order) diff --git a/src/squeue/squeue.c b/src/squeue/squeue.c index 12ab9db3b..0ab83b8f6 100644 --- a/src/squeue/squeue.c +++ b/src/squeue/squeue.c @@ -1,9 +1,8 @@ /*****************************************************************************\ * squeue.c - Report jobs in the slurm system - * - * $Id: squeue.c 14165 2008-05-30 21:23:22Z jette $ ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Joey Ekstrom <ekstrom1@llnl.gov>, * Morris Jette <jette1@llnl.gov>, et. al. @@ -133,28 +132,46 @@ _print_job ( void ) static job_info_msg_t * old_job_ptr = NULL, * new_job_ptr; int error_code; uint16_t show_flags = 0; + uint32_t job_id = 0; if (params.all_flag) show_flags |= SHOW_ALL; - + if (params.job_list && (list_count(params.job_list) == 1)) { + ListIterator iterator; + uint32_t *job_id_ptr; + iterator = list_iterator_create(params.job_list); + job_id_ptr = list_next(iterator); + job_id = *job_id_ptr; + list_iterator_destroy(iterator); + } + if (old_job_ptr) { - error_code = slurm_load_jobs (old_job_ptr->last_update, - &new_job_ptr, show_flags); + if (job_id) { + error_code = slurm_load_job(&new_job_ptr, job_id); + } else { + error_code = slurm_load_jobs(old_job_ptr->last_update, + &new_job_ptr, show_flags); + } if (error_code == SLURM_SUCCESS) slurm_free_job_info_msg( old_job_ptr ); else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) { error_code = SLURM_SUCCESS; new_job_ptr = old_job_ptr; } - } - else - error_code = slurm_load_jobs ((time_t) NULL, &new_job_ptr, + } else if (job_id) { + error_code = slurm_load_job(&new_job_ptr, job_id); + } else { + error_code = slurm_load_jobs((time_t) NULL, &new_job_ptr, show_flags); + } + if (error_code) { slurm_perror ("slurm_load_jobs error"); return; } old_job_ptr = new_job_ptr; + if (job_id) + old_job_ptr->last_update = (time_t) 0; if (quiet_flag == -1) printf ("last_update_time=%ld\n", diff --git a/src/sreport/Makefile.am b/src/sreport/Makefile.am index c01f23057..fd6611ed7 100644 --- a/src/sreport/Makefile.am +++ b/src/sreport/Makefile.am @@ -1,6 +1,7 @@ # Makefile for sreport AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) @@ -15,12 +16,11 @@ sreport_SOURCES = \ common.c sreport_LDADD = \ - $(top_builddir)/src/common/libcommon.o -ldl \ - $(top_builddir)/src/api/libslurmhelper.la \ + $(top_builddir)/src/api/libslurm.o -ldl\ $(READLINE_LIBS) sreport_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) force: -$(convenience_libs) : force +$(sreport_LDADD) : force @cd `dirname $@` && $(MAKE) `basename $@` diff --git a/src/sreport/Makefile.in b/src/sreport/Makefile.in index b32e054f3..408e7d90b 100644 --- a/src/sreport/Makefile.in +++ b/src/sreport/Makefile.in @@ -75,8 +75,7 @@ am_sreport_OBJECTS = sreport.$(OBJEXT) cluster_reports.$(OBJEXT) \ user_reports.$(OBJEXT) common.$(OBJEXT) sreport_OBJECTS = $(am_sreport_OBJECTS) am__DEPENDENCIES_1 = -sreport_DEPENDENCIES = $(top_builddir)/src/common/libcommon.o \ - $(top_builddir)/src/api/libslurmhelper.la \ +sreport_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o \ $(am__DEPENDENCIES_1) sreport_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sreport_LDFLAGS) \ @@ -266,6 +265,7 @@ target_vendor = @target_vendor@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) sreport_SOURCES = \ sreport.c sreport.h \ @@ -276,8 +276,7 @@ sreport_SOURCES = \ common.c sreport_LDADD = \ - $(top_builddir)/src/common/libcommon.o -ldl \ - $(top_builddir)/src/api/libslurmhelper.la \ + $(top_builddir)/src/api/libslurm.o -ldl\ $(READLINE_LIBS) sreport_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) @@ -483,6 +482,7 @@ install-strip: mostlyclean-generic: clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) @@ -566,7 +566,7 @@ uninstall-am: uninstall-binPROGRAMS force: -$(convenience_libs) : force +$(sreport_LDADD) : force @cd `dirname $@` && $(MAKE) `basename $@` # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. diff --git a/src/sreport/cluster_reports.c b/src/sreport/cluster_reports.c index 85759e62f..8c39260dc 100644 --- a/src/sreport/cluster_reports.c +++ b/src/sreport/cluster_reports.c @@ -71,9 +71,12 @@ static int _set_cond(int *start, int argc, char *argv[], int end = 0; int local_cluster_flag = all_clusters_flag; + if(!cluster_cond->cluster_list) + cluster_cond->cluster_list = list_create(slurm_destroy_char); + for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Set", 3) == 0) { + if (!strncasecmp (argv[i], "Set", 3)) { i--; break; } else if(!end && !strncasecmp(argv[i], "where", 5)) { @@ -81,24 +84,23 @@ static int _set_cond(int *start, int argc, char *argv[], } else if(!end && !strncasecmp(argv[i], "all_clusters", 1)) { local_cluster_flag = 1; continue; - } else if(!end) { - addto_char_list(cluster_cond->cluster_list, argv[i]); + } else if(!end + || !strncasecmp (argv[i], "Names", 1)) { + slurm_addto_char_list(cluster_cond->cluster_list, + argv[i]); set = 1; - } else if (strncasecmp (argv[i], "End", 1) == 0) { + } else if (!strncasecmp (argv[i], "End", 1)) { cluster_cond->usage_end = parse_time(argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Format", 1) == 0) { + } else if (!strncasecmp (argv[i], "Format", 1)) { if(format_list) - addto_char_list(format_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Names", 1) == 0) { - addto_char_list(cluster_cond->cluster_list, - argv[i]+end); - set = 1; - } else if (strncasecmp (argv[i], "Start", 1) == 0) { + slurm_addto_char_list(format_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Start", 1)) { cluster_cond->usage_start = parse_time(argv[i]+end); set = 1; } else { - printf(" Unknown condition: %s\n" + exit_code=1; + fprintf(stderr," Unknown condition: %s\n" "Use keyword set to modify value\n", argv[i]); } } @@ -123,7 +125,9 @@ static int _setup_print_fields_list(List format_list) char *object = NULL; if(!format_list || !list_count(format_list)) { - printf(" error: we need a format list to set up the print.\n"); + exit_code=1; + fprintf(stderr, " we need a format list " + "to set up the print.\n"); return SLURM_ERROR; } @@ -192,7 +196,8 @@ static int _setup_print_fields_list(List format_list) field->len = 9; field->print_routine = sreport_print_time; } else { - printf("Unknown field '%s'\n", object); + exit_code=1; + fprintf(stderr, " Unknown field '%s'\n", object); xfree(field); continue; } @@ -211,14 +216,14 @@ static List _get_cluster_list(int argc, char *argv[], uint32_t *total_time, int i=0; List cluster_list = NULL; - cluster_cond->cluster_list = list_create(slurm_destroy_char); cluster_cond->with_usage = 1; _set_cond(&i, argc, argv, cluster_cond, format_list); cluster_list = acct_storage_g_get_clusters(db_conn, cluster_cond); if(!cluster_list) { - printf(" Problem with cluster query.\n"); + exit_code=1; + fprintf(stderr, " Problem with cluster query.\n"); return NULL; } @@ -270,7 +275,7 @@ extern int cluster_utilization(int argc, char *argv[]) goto end_it; if(!list_count(format_list)) - addto_char_list(format_list, "Cl,a,d,i,res,rep"); + slurm_addto_char_list(format_list, "Cl,a,d,i,res,rep"); _setup_print_fields_list(format_list); list_destroy(format_list); @@ -311,48 +316,40 @@ extern int cluster_utilization(int argc, char *argv[]) while((field = list_next(itr2))) { switch(field->type) { case PRINT_CLUSTER_NAME: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, cluster->name); break; case PRINT_CLUSTER_CPUS: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, total_acct.cpu_count); break; case PRINT_CLUSTER_ACPU: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, total_acct.alloc_secs, total_reported); break; case PRINT_CLUSTER_DCPU: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, total_acct.down_secs, total_reported); break; case PRINT_CLUSTER_ICPU: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, total_acct.idle_secs, total_reported); break; case PRINT_CLUSTER_RCPU: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, total_acct.resv_secs, total_reported); break; case PRINT_CLUSTER_OCPU: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, total_acct.over_secs, total_reported); break; case PRINT_CLUSTER_TOTAL: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, total_reported, local_total_time); break; diff --git a/src/sreport/common.c b/src/sreport/common.c index 38fcfac37..9df97591b 100644 --- a/src/sreport/common.c +++ b/src/sreport/common.c @@ -39,68 +39,47 @@ #include "sreport.h" -extern void sreport_print_time(type_t type, print_field_t *field, +extern void sreport_print_time(print_field_t *field, uint64_t value, uint64_t total_time) { if(!total_time) total_time = 1; - switch(type) { - case SLURM_PRINT_HEADLINE: + /* (value == unset) || (value == cleared) */ + if((value == NO_VAL) || (value == INFINITE)) { if(print_fields_parsable_print) - printf("%s|", field->name); - else - printf("%-*.*s ", field->len, field->len, field->name); - break; - case SLURM_PRINT_UNDERSCORE: - if(!print_fields_parsable_print) - printf("%-*.*s ", field->len, field->len, - "---------------------------------------"); - break; - case SLURM_PRINT_VALUE: - /* (value == unset) || (value == cleared) */ - if((value == NO_VAL) || (value == INFINITE)) { - if(print_fields_parsable_print) - printf("|"); - else - printf("%-*s ", field->len, " "); - } else { - char *output = NULL; - double percent = (double)value; - - switch(time_format) { - case SREPORT_TIME_SECS: - output = xstrdup_printf("%llu", value); - break; - case SREPORT_TIME_PERCENT: - percent /= total_time; - percent *= 100; - output = xstrdup_printf("%.2lf%%", percent); - break; - case SREPORT_TIME_SECS_PER: - percent /= total_time; - percent *= 100; - output = xstrdup_printf("%llu(%.2lf%%)", - value, percent); - break; - default: - output = xstrdup_printf("%llu", value); - break; - } - - if(print_fields_parsable_print) - printf("%s|", output); - else - printf("%*s ", field->len, output); - xfree(output); + printf("|"); + else + printf("%-*s ", field->len, " "); + } else { + char *output = NULL; + double percent = (double)value; + + switch(time_format) { + case SREPORT_TIME_SECS: + output = xstrdup_printf("%llu", value); + break; + case SREPORT_TIME_PERCENT: + percent /= total_time; + percent *= 100; + output = xstrdup_printf("%.2lf%%", percent); + break; + case SREPORT_TIME_SECS_PER: + percent /= total_time; + percent *= 100; + output = xstrdup_printf("%llu(%.2lf%%)", + value, percent); + break; + default: + output = xstrdup_printf("%llu", value); + break; } - break; - default: + if(print_fields_parsable_print) - printf("%s|", "n/a"); + printf("%s|", output); else - printf("%-*.*s ", field->len, field->len, "n/a"); - break; + printf("%*s ", field->len, output); + xfree(output); } } @@ -156,9 +135,15 @@ extern void addto_char_list(List char_list, char *names) { int i=0, start=0; char *name = NULL, *tmp_char = NULL; - ListIterator itr = list_iterator_create(char_list); + ListIterator itr = NULL; + + if(!char_list) { + error("No list was given to fill in"); + return; + } - if(names && char_list) { + itr = list_iterator_create(char_list); + if(names) { if (names[i] == '\"' || names[i] == '\'') i++; start = i; diff --git a/src/sreport/job_reports.c b/src/sreport/job_reports.c index ff249fac0..654ca7b02 100644 --- a/src/sreport/job_reports.c +++ b/src/sreport/job_reports.c @@ -112,6 +112,109 @@ static void _destroy_cluster_grouping(void *object) } } +/* returns number of objects added to list */ +extern int _addto_uid_char_list(List char_list, char *names) +{ + int i=0, start=0; + char *name = NULL, *tmp_char = NULL; + ListIterator itr = NULL; + char quote_c = '\0'; + int quote = 0; + int count = 0; + + if(!char_list) { + error("No list was given to fill in"); + return 0; + } + + itr = list_iterator_create(char_list); + if(names) { + if (names[i] == '\"' || names[i] == '\'') { + quote_c = names[i]; + quote = 1; + i++; + } + start = i; + while(names[i]) { + //info("got %d - %d = %d", i, start, i-start); + if(quote && names[i] == quote_c) + break; + else if (names[i] == '\"' || names[i] == '\'') + names[i] = '`'; + else if(names[i] == ',') { + if((i-start) > 0) { + name = xmalloc((i-start+1)); + memcpy(name, names+start, (i-start)); + //info("got %s %d", name, i-start); + if (!isdigit((int) *name)) { + struct passwd *pwd; + if (!(pwd=getpwnam(name))) { + fprintf(stderr, + "Invalid user " + "id: %s\n", + name); + exit(1); + } + xfree(name); + name = xstrdup_printf( + "%d", pwd->pw_uid); + } + + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + list_append(char_list, name); + count++; + } else + xfree(name); + list_iterator_reset(itr); + } + i++; + start = i; + if(!names[i]) { + info("There is a problem with " + "your request. It appears you " + "have spaces inside your list."); + break; + } + } + i++; + } + if((i-start) > 0) { + name = xmalloc((i-start)+1); + memcpy(name, names+start, (i-start)); + + if (!isdigit((int) *name)) { + struct passwd *pwd; + if (!(pwd=getpwnam(name))) { + fprintf(stderr, + "Invalid user id: %s\n", + name); + exit(1); + } + xfree(name); + name = xstrdup_printf("%d", pwd->pw_uid); + } + + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + list_append(char_list, name); + count++; + } else + xfree(name); + } + } + list_iterator_destroy(itr); + return count; +} + static int _set_cond(int *start, int argc, char *argv[], acct_job_cond_t *job_cond, List format_list, List grouping_list) @@ -121,9 +224,12 @@ static int _set_cond(int *start, int argc, char *argv[], int end = 0; int local_cluster_flag = all_clusters_flag; + if(!job_cond->cluster_list) + job_cond->cluster_list = list_create(slurm_destroy_char); + for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Set", 3) == 0) { + if (!strncasecmp (argv[i], "Set", 3)) { i--; break; } else if(!end && !strncasecmp(argv[i], "where", 5)) { @@ -131,38 +237,50 @@ static int _set_cond(int *start, int argc, char *argv[], } else if(!end && !strncasecmp(argv[i], "all_clusters", 1)) { local_cluster_flag = 1; continue; - } else if(!end) { - addto_char_list(job_cond->cluster_list, argv[i]); + } else if(!end || !strncasecmp (argv[i], "Clusters", 1)) { + slurm_addto_char_list(job_cond->cluster_list, argv[i]); set = 1; - } else if (strncasecmp (argv[i], "Accounts", 2) == 0) { - addto_char_list(job_cond->acct_list, + } else if (!strncasecmp (argv[i], "Accounts", 2)) { + if(!job_cond->acct_list) + job_cond->acct_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(job_cond->acct_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Associations", 2) == 0) { - addto_char_list(job_cond->associd_list, + } else if (!strncasecmp (argv[i], "Associations", 2)) { + if(!job_cond->associd_list) + job_cond->associd_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(job_cond->associd_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Clusters", 1) == 0) { - addto_char_list(job_cond->cluster_list, + } else if (!strncasecmp (argv[i], "Clusters", 1)) { + slurm_addto_char_list(job_cond->cluster_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "End", 1) == 0) { + } else if (!strncasecmp (argv[i], "End", 1)) { job_cond->usage_end = parse_time(argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Format", 1) == 0) { + } else if (!strncasecmp (argv[i], "Format", 1)) { if(format_list) - addto_char_list(format_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Gid", 2) == 0) { - addto_char_list(job_cond->groupid_list, + slurm_addto_char_list(format_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Gid", 2)) { + if(!job_cond->groupid_list) + job_cond->groupid_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(job_cond->groupid_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "grouping", 2) == 0) { + } else if (!strncasecmp (argv[i], "grouping", 2)) { if(grouping_list) - addto_char_list(grouping_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Jobs", 1) == 0) { + slurm_addto_char_list(grouping_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Jobs", 1)) { char *end_char = NULL, *start_char = argv[i]+end; jobacct_selected_step_t *selected_step = NULL; char *dot = NULL; + if(!job_cond->step_list) + job_cond->step_list = + list_create(slurm_destroy_char); while ((end_char = strstr(start_char, ",")) && start_char) { @@ -178,33 +296,36 @@ static int _set_cond(int *start, int argc, char *argv[], dot = strstr(start_char, "."); if (dot == NULL) { debug2("No jobstep requested"); - selected_step->step = NULL; - selected_step->stepid = - (uint32_t)NO_VAL; + selected_step->stepid = NO_VAL; } else { *dot++ = 0; - selected_step->step = xstrdup(dot); selected_step->stepid = atoi(dot); } - selected_step->job = xstrdup(start_char); selected_step->jobid = atoi(start_char); start_char = end_char + 1; } set = 1; - } else if (strncasecmp (argv[i], "Partitions", 1) == 0) { - addto_char_list(job_cond->partition_list, + } else if (!strncasecmp (argv[i], "Partitions", 1)) { + if(!job_cond->partition_list) + job_cond->partition_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(job_cond->partition_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Start", 1) == 0) { + } else if (!strncasecmp (argv[i], "Start", 1)) { job_cond->usage_start = parse_time(argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Users", 1) == 0) { - addto_char_list(job_cond->user_list, - argv[i]+end); + } else if (!strncasecmp (argv[i], "Users", 1)) { + if(!job_cond->userid_list) + job_cond->userid_list = + list_create(slurm_destroy_char); + _addto_uid_char_list(job_cond->userid_list, + argv[i]+end); set = 1; } else { - printf(" Unknown condition: %s\n" + exit_code=1; + fprintf(stderr, " Unknown condition: %s\n" "Use keyword set to modify value\n", argv[i]); } } @@ -230,7 +351,9 @@ static int _setup_print_fields_list(List format_list) char *object = NULL; if(!format_list || !list_count(format_list)) { - printf(" error: we need a format list to set up the print.\n"); + exit_code=1; + fprintf(stderr, + " We need a format list to set up the print.\n"); return SLURM_ERROR; } @@ -276,7 +399,8 @@ static int _setup_print_fields_list(List format_list) field->len = 9; field->print_routine = print_fields_str; } else { - printf("Unknown field '%s'\n", object); + exit_code=1; + fprintf(stderr, " Unknown field '%s'\n", object); xfree(field); continue; } @@ -296,8 +420,9 @@ static int _setup_grouping_print_fields_list(List grouping_list) uint32_t size = 0; if(!grouping_list || !list_count(grouping_list)) { - printf(" error: we need a grouping list to " - "set up the print.\n"); + exit_code=1; + fprintf(stderr, " We need a grouping list to " + "set up the print.\n"); return SLURM_ERROR; } @@ -367,19 +492,12 @@ extern int job_sizes_grouped_by_top_acct(int argc, char *argv[]) print_fields_list = list_create(destroy_print_field); - job_cond->acct_list = list_create(slurm_destroy_char); - job_cond->associd_list = list_create(slurm_destroy_char); - job_cond->cluster_list = list_create(slurm_destroy_char); - job_cond->groupid_list = list_create(slurm_destroy_char); - job_cond->partition_list = list_create(slurm_destroy_char); - job_cond->step_list = list_create(slurm_destroy_char); - _set_cond(&i, argc, argv, job_cond, NULL, grouping_list); - addto_char_list(format_list, "Cl,a"); + slurm_addto_char_list(format_list, "Cl,a"); if(!list_count(grouping_list)) - addto_char_list(grouping_list, "50,250,500,1000"); + slurm_addto_char_list(grouping_list, "50,250,500,1000"); _setup_print_fields_list(format_list); list_destroy(format_list); @@ -388,7 +506,8 @@ extern int job_sizes_grouped_by_top_acct(int argc, char *argv[]) job_list = jobacct_storage_g_get_jobs_cond(db_conn, job_cond); if(!job_list) { - printf(" Problem with job query.\n"); + exit_code=1; + fprintf(stderr, " Problem with job query.\n"); goto end_it; } @@ -599,13 +718,11 @@ no_assocs: switch(field->type) { case PRINT_JOB_CLUSTER: field->print_routine( - SLURM_PRINT_VALUE, field, cluster_group->cluster); break; case PRINT_JOB_ACCOUNT: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, acct_group->acct); break; default: @@ -619,7 +736,6 @@ no_assocs: switch(field->type) { case PRINT_JOB_SIZE: field->print_routine( - SLURM_PRINT_VALUE, field, local_group->cpu_secs, acct_group->cpu_secs); @@ -630,8 +746,7 @@ no_assocs: } list_iterator_reset(itr2); list_iterator_destroy(local_itr); - total_field.print_routine(SLURM_PRINT_VALUE, - &total_field, + total_field.print_routine(&total_field, acct_group->cpu_secs, cluster_group->cpu_secs); diff --git a/src/sreport/sreport.h b/src/sreport/sreport.h index 2633a6cf3..d8f25640d 100644 --- a/src/sreport/sreport.h +++ b/src/sreport/sreport.h @@ -102,11 +102,10 @@ extern void *db_conn; extern uint32_t my_uid; extern int all_clusters_flag; -extern void sreport_print_time(type_t type, print_field_t *field, +extern void sreport_print_time(print_field_t *field, uint64_t value, uint64_t total_time); extern int parse_option_end(char *option); extern char *strip_quotes(char *option, int *increased); -extern void addto_char_list(List char_list, char *names); extern int set_start_end_time(time_t *start, time_t *end); #endif /* HAVE_SREPORT_H */ diff --git a/src/sreport/user_reports.c b/src/sreport/user_reports.c index fa1ff5340..a1496a48f 100644 --- a/src/sreport/user_reports.c +++ b/src/sreport/user_reports.c @@ -129,9 +129,6 @@ static int _set_cond(int *start, int argc, char *argv[], return SLURM_ERROR; } - if(!user_cond->user_list) - user_cond->user_list = list_create(slurm_destroy_char); - user_cond->with_deleted = 1; user_cond->with_assocs = 1; if(!user_cond->assoc_cond) { @@ -140,14 +137,13 @@ static int _set_cond(int *start, int argc, char *argv[], user_cond->assoc_cond->with_usage = 1; } assoc_cond = user_cond->assoc_cond; - if(!assoc_cond->acct_list) - assoc_cond->acct_list = list_create(slurm_destroy_char); + if(!assoc_cond->cluster_list) assoc_cond->cluster_list = list_create(slurm_destroy_char); for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Set", 3) == 0) { + if (!strncasecmp (argv[i], "Set", 3)) { i--; break; } else if(!end && !strncasecmp(argv[i], "where", 5)) { @@ -157,32 +153,37 @@ static int _set_cond(int *start, int argc, char *argv[], continue; } else if (!end && !strncasecmp(argv[i], "group", 1)) { group_accts = 1; - } else if(!end) { - addto_char_list(user_cond->user_list, argv[i]); + } else if(!end + || !strncasecmp (argv[i], "Users", 1)) { + if(!assoc_cond->user_list) + assoc_cond->user_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(assoc_cond->user_list, + argv[i]); set = 1; - } else if (strncasecmp (argv[i], "Accounts", 2) == 0) { - addto_char_list(assoc_cond->acct_list, + } else if (!strncasecmp (argv[i], "Accounts", 2)) { + if(!assoc_cond->acct_list) + assoc_cond->acct_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(assoc_cond->acct_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Clusters", 1) == 0) { - addto_char_list(assoc_cond->cluster_list, + } else if (!strncasecmp (argv[i], "Clusters", 1)) { + slurm_addto_char_list(assoc_cond->cluster_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "End", 1) == 0) { + } else if (!strncasecmp (argv[i], "End", 1)) { assoc_cond->usage_end = parse_time(argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Format", 1) == 0) { + } else if (!strncasecmp (argv[i], "Format", 1)) { if(format_list) - addto_char_list(format_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Start", 1) == 0) { + slurm_addto_char_list(format_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Start", 1)) { assoc_cond->usage_start = parse_time(argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Users", 1) == 0) { - addto_char_list(user_cond->user_list, - argv[i]+end); - set = 1; } else { - printf(" Unknown condition: %s\n" + exit_code=1; + fprintf(stderr, " Unknown condition: %s\n" "Use keyword set to modify value\n", argv[i]); } } @@ -207,7 +208,9 @@ static int _setup_print_fields_list(List format_list) char *object = NULL; if(!format_list || !list_count(format_list)) { - printf(" error: we need a format list to set up the print.\n"); + exit_code=1; + fprintf(stderr, + " We need a format list to set up the print.\n"); return SLURM_ERROR; } @@ -246,7 +249,8 @@ static int _setup_print_fields_list(List format_list) field->len = 10; field->print_routine = sreport_print_time; } else { - printf("Unknown field '%s'\n", object); + exit_code=1; + fprintf(stderr, " Unknown field '%s'\n", object); xfree(field); continue; } @@ -284,14 +288,15 @@ extern int user_top(int argc, char *argv[]) _set_cond(&i, argc, argv, user_cond, format_list); if(!list_count(format_list)) - addto_char_list(format_list, "Cl,L,P,A,U"); + slurm_addto_char_list(format_list, "Cl,L,P,A,U"); _setup_print_fields_list(format_list); list_destroy(format_list); user_list = acct_storage_g_get_users(db_conn, user_cond); if(!user_list) { - printf(" Problem with user query.\n"); + exit_code=1; + fprintf(stderr, " Problem with user query.\n"); goto end_it; } @@ -443,20 +448,17 @@ extern int user_top(int argc, char *argv[]) } list_iterator_destroy(itr3); field->print_routine( - SLURM_PRINT_VALUE, field, tmp_char); xfree(tmp_char); break; case PRINT_USER_CLUSTER: field->print_routine( - SLURM_PRINT_VALUE, field, local_cluster->name); break; case PRINT_USER_LOGIN: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, local_user->name); break; case PRINT_USER_PROPER: @@ -468,13 +470,11 @@ extern int user_top(int argc, char *argv[]) tmp_char = pwd->pw_gecos; } - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, tmp_char); break; case PRINT_USER_USED: field->print_routine( - SLURM_PRINT_VALUE, field, local_user->cpu_secs, local_cluster->cpu_secs); diff --git a/src/srun/Makefile.am b/src/srun/Makefile.am index 80a846909..5809c715a 100644 --- a/src/srun/Makefile.am +++ b/src/srun/Makefile.am @@ -1,6 +1,7 @@ # AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) @@ -22,7 +23,8 @@ srun_SOURCES = \ multi_prog.c multi_prog.h \ srun.wrapper.c -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = \ + $(top_builddir)/src/api/libslurm.o -ldl srun_LDADD = \ $(convenience_libs) diff --git a/src/srun/Makefile.in b/src/srun/Makefile.in index 4485b7d17..0f9dbabe6 100644 --- a/src/srun/Makefile.in +++ b/src/srun/Makefile.in @@ -75,7 +75,8 @@ am_srun_OBJECTS = srun.$(OBJEXT) opt.$(OBJEXT) srun_job.$(OBJEXT) \ allocate.$(OBJEXT) core-format.$(OBJEXT) multi_prog.$(OBJEXT) \ srun.wrapper.$(OBJEXT) srun_OBJECTS = $(am_srun_OBJECTS) -srun_DEPENDENCIES = $(convenience_libs) +am__DEPENDENCIES_1 = $(top_builddir)/src/api/libslurm.o +srun_DEPENDENCIES = $(am__DEPENDENCIES_1) srun_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(srun_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -264,6 +265,7 @@ target_vendor = @target_vendor@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) srun_SOURCES = \ srun.c srun.h \ @@ -281,7 +283,9 @@ srun_SOURCES = \ multi_prog.c multi_prog.h \ srun.wrapper.c -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = \ + $(top_builddir)/src/api/libslurm.o -ldl + srun_LDADD = \ $(convenience_libs) @@ -492,6 +496,7 @@ install-strip: mostlyclean-generic: clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) diff --git a/src/srun/allocate.c b/src/srun/allocate.c index 130f55e1a..505f6b697 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/srun/allocate.c - srun functions for managing node allocations - * $Id: allocate.c 14453 2008-07-08 20:26:18Z da $ + * $Id: allocate.c 14570 2008-07-18 22:06:26Z da $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -80,7 +80,6 @@ static uint32_t pending_job_id = 0; * Static Prototypes */ static void _set_pending_job_id(uint32_t job_id); -static void _ignore_signal(int signo); static void _exit_on_signal(int signo); static void _signal_while_allocating(int signo); static void _intr_handler(int signo); @@ -101,11 +100,6 @@ static void _signal_while_allocating(int signo) } } -static void _ignore_signal(int signo) -{ - /* do nothing */ -} - static void _exit_on_signal(int signo) { exit_flag = true; @@ -114,7 +108,10 @@ static void _exit_on_signal(int signo) /* This typically signifies the job was cancelled by scancel */ static void _job_complete_handler(srun_job_complete_msg_t *msg) { - info("Force Terminated job"); + if((int)msg->step_id >= 0) + info("Force Terminated job %u.%u", msg->job_id, msg->step_id); + else + info("Force Terminated job %u", msg->job_id); } /* @@ -246,18 +243,24 @@ allocate_nodes(void) } xsignal(SIGHUP, _exit_on_signal); - xsignal(SIGINT, _ignore_signal); - xsignal(SIGQUIT, _ignore_signal); - xsignal(SIGPIPE, _ignore_signal); - xsignal(SIGTERM, _ignore_signal); - xsignal(SIGUSR1, _ignore_signal); - xsignal(SIGUSR2, _ignore_signal); + xsignal(SIGINT, ignore_signal); + xsignal(SIGQUIT, ignore_signal); + xsignal(SIGPIPE, ignore_signal); + xsignal(SIGTERM, ignore_signal); + xsignal(SIGUSR1, ignore_signal); + xsignal(SIGUSR2, ignore_signal); job_desc_msg_destroy(j); return resp; } +void +ignore_signal(int signo) +{ + /* do nothing */ +} + int cleanup_allocation() { @@ -450,7 +453,9 @@ job_desc_msg_create_from_opts () if (opt.job_min_threads != NO_VAL) j->job_min_threads = opt.job_min_threads; if (opt.job_min_memory != NO_VAL) - j->job_min_memory = opt.job_min_memory; + j->job_min_memory = opt.job_min_memory; + else if (opt.mem_per_cpu != NO_VAL) + j->job_min_memory = opt.mem_per_cpu | MEM_PER_CPU; if (opt.job_min_tmp_disk != NO_VAL) j->job_min_tmp_disk = opt.job_min_tmp_disk; if (opt.overcommit) { @@ -511,8 +516,6 @@ create_job_step(srun_job_t *job) : (opt.nprocs*opt.cpus_per_task); job->ctx_params.relative = (uint16_t)opt.relative; - if (opt.task_mem != NO_VAL) - job->ctx_params.mem_per_task = (uint16_t)opt.task_mem; job->ctx_params.ckpt_interval = (uint16_t)opt.ckpt_interval; job->ctx_params.ckpt_path = opt.ckpt_path; job->ctx_params.exclusive = (uint16_t)opt.exclusive; diff --git a/src/srun/allocate.h b/src/srun/allocate.h index 91474c653..85672c552 100644 --- a/src/srun/allocate.h +++ b/src/srun/allocate.h @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/srun/allocate.h - node allocation functions for srun - * $Id: allocate.h 13672 2008-03-19 23:10:58Z jette $ + * $Id: allocate.h 14570 2008-07-18 22:06:26Z da $ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -60,6 +60,9 @@ slurmctld_comm_addr_t slurmctld_comm_addr; */ resource_allocation_response_msg_t * allocate_nodes(void); +/* dummy function to handle all signals we want to ignore */ +void ignore_signal(int signo); + /* clean up the msg thread polling for information from the controller */ int cleanup_allocation(); diff --git a/src/srun/opt.c b/src/srun/opt.c index 625c3ad46..94bfff45f 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -1,8 +1,8 @@ /*****************************************************************************\ * opt.c - options processing for srun - * $Id: opt.c 14420 2008-07-02 19:52:49Z jette $ ***************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Mark Grondona <grondona1@llnl.gov>, et. al. * LLNL-CODE-402394. @@ -153,7 +153,7 @@ #define LONG_OPT_NTASKSPERNODE 0x136 #define LONG_OPT_NTASKSPERSOCKET 0x137 #define LONG_OPT_NTASKSPERCORE 0x138 -#define LONG_OPT_TASK_MEM 0x13a +#define LONG_OPT_MEM_PER_CPU 0x13a #define LONG_OPT_HINT 0x13b #define LONG_OPT_BLRTS_IMAGE 0x140 #define LONG_OPT_LINUX_IMAGE 0x141 @@ -656,7 +656,7 @@ static void _opt_default() opt.job_min_cores = NO_VAL; opt.job_min_threads = NO_VAL; opt.job_min_memory = NO_VAL; - opt.task_mem = NO_VAL; + opt.mem_per_cpu = NO_VAL; opt.job_min_tmp_disk= NO_VAL; opt.hold = false; @@ -777,7 +777,6 @@ env_vars_t env_vars[] = { {"SLURM_EXCLUSIVE", OPT_EXCLUSIVE, NULL, NULL }, {"SLURM_OPEN_MODE", OPT_OPEN_MODE, NULL, NULL }, {"SLURM_ACCTG_FREQ", OPT_INT, &opt.acctg_freq, NULL }, -{"SLURM_TASK_MEM", OPT_INT, &opt.task_mem, NULL }, {"SLURM_NETWORK", OPT_STRING, &opt.network, NULL }, {NULL, 0, NULL, NULL} }; @@ -991,8 +990,9 @@ static void set_options(const int argc, char **argv) {"mincores", required_argument, 0, LONG_OPT_MINCORES}, {"minthreads", required_argument, 0, LONG_OPT_MINTHREADS}, {"mem", required_argument, 0, LONG_OPT_MEM}, - {"job-mem", required_argument, 0, LONG_OPT_TASK_MEM}, - {"task-mem", required_argument, 0, LONG_OPT_TASK_MEM}, + {"job-mem", required_argument, 0, LONG_OPT_MEM_PER_CPU}, + {"task-mem", required_argument, 0, LONG_OPT_MEM_PER_CPU}, + {"mem-per-cpu", required_argument, 0, LONG_OPT_MEM_PER_CPU}, {"hint", required_argument, 0, LONG_OPT_HINT}, {"mpi", required_argument, 0, LONG_OPT_MPI}, {"tmp", required_argument, 0, LONG_OPT_TMP}, @@ -1314,9 +1314,9 @@ static void set_options(const int argc, char **argv) exit(1); } break; - case LONG_OPT_TASK_MEM: - opt.task_mem = (int) str_to_bytes(optarg); - if (opt.task_mem < 0) { + case LONG_OPT_MEM_PER_CPU: + opt.mem_per_cpu = (int) str_to_bytes(optarg); + if (opt.mem_per_cpu < 0) { error("invalid memory constraint %s", optarg); exit(1); @@ -1626,15 +1626,11 @@ static void _opt_args(int argc, char **argv) set_options(argc, argv); - /* When CR with memory as a CR is enabled we need to assign - * adequate value or check the value to opt.mem */ - if ((opt.job_min_memory >= -1) && (opt.task_mem > 0)) { - if (opt.job_min_memory == -1) { - opt.job_min_memory = opt.task_mem; - } else if (opt.job_min_memory < opt.task_mem) { - info("mem < task-mem - resizing mem to be equal " - "to task-mem"); - opt.job_min_memory = opt.task_mem; + if ((opt.job_min_memory > -1) && (opt.mem_per_cpu > -1)) { + if (opt.job_min_memory < opt.mem_per_cpu) { + info("mem < mem-per-cpu - resizing mem to be equal " + "to mem-per-cpu"); + opt.job_min_memory = opt.mem_per_cpu; } } @@ -2030,19 +2026,6 @@ static bool _opt_verify(void) xfree(sched_name); } - if (opt.task_mem > 0) { - uint32_t max_mem = slurm_get_max_mem_per_task(); - if (max_mem && (opt.task_mem > max_mem)) { - info("WARNING: Reducing --task-mem to system maximum " - "of %u MB", max_mem); - opt.task_mem = max_mem; - } - } else { - uint32_t max_mem = slurm_get_def_mem_per_task(); - if (max_mem) - opt.task_mem = max_mem; - } - return verified; } @@ -2069,8 +2052,8 @@ static char *print_constraints() if (opt.job_min_memory > 0) xstrfmtcat(buf, "mem=%dM ", opt.job_min_memory); - if (opt.task_mem > 0) - xstrfmtcat(buf, "task-mem=%dM ", opt.task_mem); + if (opt.mem_per_cpu > 0) + xstrfmtcat(buf, "mem-per-cpu=%dM ", opt.mem_per_cpu); if (opt.job_min_tmp_disk > 0) xstrfmtcat(buf, "tmp=%ld ", opt.job_min_tmp_disk); @@ -2223,7 +2206,7 @@ static void _usage(void) " [--kill-on-bad-exit] [--propagate[=rlimits] [--comment=name]\n" " [--cpu_bind=...] [--mem_bind=...] [--network=type]\n" " [--ntasks-per-node=n] [--ntasks-per-socket=n]\n" -" [--ntasks-per-core=n]\n" +" [--ntasks-per-core=n] [--mem-per-cpu=MB]\n" #ifdef HAVE_BG /* Blue gene specific options */ " [--geometry=XxYxZ] [--conn-type=type] [--no-rotate] [--reboot]\n" " [--blrts-image=path] [--linux-image=path]\n" @@ -2321,8 +2304,8 @@ static void _help(void) " --exclusive allocate nodes in exclusive mode when\n" " cpu consumable resource is enabled\n" " or don't share CPUs for job steps\n" -" --task-mem=MB maximum amount of real memory per task\n" -" required by the job.\n" +" --mem-per-cpu=MB maximum amount of real memory per allocated\n" +" CPU required by the job.\n" " --mem >= --job-mem if --mem is specified.\n" "\n" "Affinity/Multi-core options: (when the task/affinity plugin is enabled)\n" diff --git a/src/srun/opt.h b/src/srun/opt.h index 54cdac36c..200042374 100644 --- a/src/srun/opt.h +++ b/src/srun/opt.h @@ -1,6 +1,6 @@ /*****************************************************************************\ * opt.h - definitions for srun option processing - * $Id: opt.h 13771 2008-04-02 20:03:47Z jette $ + * $Id: opt.h 14469 2008-07-09 18:15:23Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -170,7 +170,7 @@ typedef struct srun_options { int32_t job_min_cores; /* --mincores=n */ int32_t job_min_threads;/* --minthreads=n */ int32_t job_min_memory; /* --mem=n */ - int32_t task_mem; /* --task-mem=n */ + int32_t mem_per_cpu; /* --mem-per-cpu=n */ long job_min_tmp_disk; /* --tmp=n */ char *constraints; /* --constraints=, -C constraint*/ bool contiguous; /* --contiguous */ diff --git a/src/srun/srun.c b/src/srun/srun.c index c994af6b4..b02333867 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -120,6 +120,7 @@ static int _become_user (void); static int _call_spank_local_user (srun_job_t *job); static void _define_symbols(void); static void _handle_intr(); +static void _handle_pipe(int signo); static void _handle_signal(int signo); static void _print_job_information(resource_allocation_response_msg_t *resp); static void _pty_restore(void); @@ -229,6 +230,7 @@ int srun(int ac, char **av) exit(1); } } else if ((resp = existing_allocation())) { + job_id = resp->job_id; if (opt.alloc_nodelist == NULL) opt.alloc_nodelist = xstrdup(resp->node_list); @@ -409,7 +411,8 @@ int srun(int ac, char **av) if (opt.debugger_test) mpir_dump_proctable(); } else { - info("Job step aborted before step completely launched."); + info("Job step %u.%u aborted before step completely launched.", + job->jobid, job->stepid); } slurm_step_launch_wait_finish(job->step_ctx); @@ -1061,7 +1064,8 @@ static void _handle_intr() return; } - info("sending Ctrl-C to job"); + info("sending Ctrl-C to job %u.%u", + job->jobid, job->stepid); last_intr_sent = time(NULL); slurm_step_launch_fwd_signal(job->step_ctx, SIGINT); @@ -1072,6 +1076,16 @@ static void _handle_intr() } } +static void _handle_pipe(int signo) +{ + static int ending = 0; + + if(ending) + return; + ending = 1; + slurm_step_launch_abort(job->step_ctx); +} + static void _handle_signal(int signo) { debug2("got signal %d", signo); @@ -1085,7 +1099,11 @@ static void _handle_signal(int signo) /* continue with slurm_step_launch_abort */ case SIGTERM: case SIGHUP: - job_force_termination(job); + /* No need to call job_force_termination here since we + * are ending the job now and we don't need to update the + * state. + */ + info ("forcing job termination"); slurm_step_launch_abort(job->step_ctx); break; /* case SIGTSTP: */ @@ -1104,7 +1122,7 @@ static int _setup_signals() { int sigarray[] = { SIGINT, SIGQUIT, /*SIGTSTP,*/ SIGCONT, SIGTERM, - SIGALRM, SIGUSR1, SIGUSR2, SIGPIPE, 0 + SIGALRM, SIGUSR1, SIGUSR2, /*SIGPIPE,*/ 0 }; int rc = SLURM_SUCCESS, i=0, signo; @@ -1113,6 +1131,10 @@ static int _setup_signals() while ((signo = sigarray[i++])) xsignal(signo, _handle_signal); + /* special case for SIGPIPE since we don't want to print stuff + * and get into a locked up state + */ + xsignal(SIGPIPE, _handle_pipe); return rc; } diff --git a/src/sstat/Makefile.am b/src/sstat/Makefile.am index cdb0a0c6e..41cf9df32 100644 --- a/src/sstat/Makefile.am +++ b/src/sstat/Makefile.am @@ -6,8 +6,8 @@ INCLUDES = -I$(top_srcdir) bin_PROGRAMS = sstat -sstat_LDADD = $(top_builddir)/src/common/libcommon.o -ldl \ - $(top_builddir)/src/api/libslurmhelper.la +sstat_LDADD = $(top_builddir)/src/api/libslurm.o -ldl + noinst_HEADERS = sstat.c sstat_SOURCES = sstat.c process.c print.c options.c diff --git a/src/sstat/Makefile.in b/src/sstat/Makefile.in index d752d24b7..c06d4277d 100644 --- a/src/sstat/Makefile.in +++ b/src/sstat/Makefile.in @@ -75,8 +75,7 @@ PROGRAMS = $(bin_PROGRAMS) am_sstat_OBJECTS = sstat.$(OBJEXT) process.$(OBJEXT) print.$(OBJEXT) \ options.$(OBJEXT) sstat_OBJECTS = $(am_sstat_OBJECTS) -sstat_DEPENDENCIES = $(top_builddir)/src/common/libcommon.o \ - $(top_builddir)/src/api/libslurmhelper.la +sstat_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o sstat_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sstat_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -267,9 +266,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) -sstat_LDADD = $(top_builddir)/src/common/libcommon.o -ldl \ - $(top_builddir)/src/api/libslurmhelper.la - +sstat_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = sstat.c sstat_SOURCES = sstat.c process.c print.c options.c sstat_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) diff --git a/src/sstat/options.c b/src/sstat/options.c index a1aafaa40..137604372 100644 --- a/src/sstat/options.c +++ b/src/sstat/options.c @@ -119,30 +119,120 @@ void _do_help(void) void _init_params() { - params.opt_cluster = NULL; /* --cluster */ - params.opt_completion = 0; /* --completion */ - params.opt_dump = 0; /* --dump */ - params.opt_dup = -1; /* --duplicates; +1 = explicitly set */ - params.opt_fdump = 0; /* --formattted_dump */ - params.opt_stat = 0; /* --stat */ - params.opt_gid = -1; /* --gid (-1=wildcard, 0=root) */ - params.opt_header = 1; /* can only be cleared */ - params.opt_help = 0; /* --help */ - params.opt_long = 0; /* --long */ - params.opt_lowmem = 0; /* --low_memory */ - params.opt_purge = 0; /* --purge */ - params.opt_total = 0; /* --total */ - params.opt_uid = -1; /* --uid (-1=wildcard, 0=root) */ - params.opt_uid_set = 0; - params.opt_verbose = 0; /* --verbose */ - params.opt_expire_timespec = NULL; /* --expire= */ - params.opt_field_list = NULL; /* --fields= */ - params.opt_filein = NULL; /* --file */ - params.opt_job_list = NULL; /* --jobs */ - params.opt_partition_list = NULL;/* --partitions */ - params.opt_state_list = NULL; /* --states */ + memset(¶ms, 0, sizeof(sacct_parameters_t)); } +/* returns number of objects added to list */ +static int _addto_job_list(List job_list, char *names) +{ + int i=0, start=0; + char *name = NULL, *dot = NULL; + jobacct_selected_step_t *selected_step = NULL; + jobacct_selected_step_t *curr_step = NULL; + + ListIterator itr = NULL; + char quote_c = '\0'; + int quote = 0; + int count = 0; + + if(!job_list) { + error("No list was given to fill in"); + return 0; + } + + itr = list_iterator_create(job_list); + if(names) { + if (names[i] == '\"' || names[i] == '\'') { + quote_c = names[i]; + quote = 1; + i++; + } + start = i; + while(names[i]) { + //info("got %d - %d = %d", i, start, i-start); + if(quote && names[i] == quote_c) + break; + else if (names[i] == '\"' || names[i] == '\'') + names[i] = '`'; + else if(names[i] == ',') { + if((i-start) > 0) { + char *dot = NULL; + name = xmalloc((i-start+1)); + memcpy(name, names+start, (i-start)); + + selected_step = xmalloc( + sizeof(jobacct_selected_step_t)); + dot = strstr(name, "."); + if (dot == NULL) { + debug2("No jobstep requested"); + selected_step->stepid = NO_VAL; + } else { + *dot++ = 0; + selected_step->stepid = + atoi(dot); + } + selected_step->jobid = atoi(name); + xfree(name); + + while((curr_step = list_next(itr))) { + if((curr_step->jobid + == selected_step->jobid) + && (curr_step->stepid + == selected_step-> + stepid)) + break; + } + + if(!curr_step) { + list_append(job_list, + selected_step); + count++; + } else + destroy_jobacct_selected_step( + selected_step); + list_iterator_reset(itr); + } + i++; + start = i; + } + i++; + } + if((i-start) > 0) { + name = xmalloc((i-start)+1); + memcpy(name, names+start, (i-start)); + + selected_step = + xmalloc(sizeof(jobacct_selected_step_t)); + dot = strstr(name, "."); + if (dot == NULL) { + debug2("No jobstep requested"); + selected_step->stepid = NO_VAL; + } else { + *dot++ = 0; + selected_step->stepid = atoi(dot); + } + selected_step->jobid = atoi(name); + xfree(name); + + while((curr_step = list_next(itr))) { + if((curr_step->jobid == selected_step->jobid) + && (curr_step->stepid + == selected_step->stepid)) + break; + } + + if(!curr_step) { + list_append(job_list, selected_step); + count++; + } else + destroy_jobacct_selected_step( + selected_step); + } + } + list_iterator_destroy(itr); + return count; +} + int decode_state_char(char *state) { if (!strcasecmp(state, "p")) @@ -165,14 +255,13 @@ int decode_state_char(char *state) return -1; // unknown } -void parse_command_line(int argc, char **argv, List selected_steps) +void parse_command_line(int argc, char **argv) { extern int optind; int c, i, optionIndex = 0; char *end = NULL, *start = NULL; jobacct_selected_step_t *selected_step = NULL; ListIterator itr = NULL; - char *dot = NULL; log_options_t logopt = LOG_OPTS_STDERR_ONLY; static struct option long_options[] = { @@ -181,7 +270,7 @@ void parse_command_line(int argc, char **argv, List selected_steps) {"help", 0, ¶ms.opt_help, 1}, {"help-fields", 0, ¶ms.opt_help, 2}, {"jobs", 1, 0, 'j'}, - {"noheader", 0, ¶ms.opt_header, 0}, + {"noheader", 0, ¶ms.opt_noheader, 1}, {"usage", 0, ¶ms.opt_help, 3}, {"verbose", 0, 0, 'v'}, {"version", 0, 0, 'V'}, @@ -198,14 +287,11 @@ void parse_command_line(int argc, char **argv, List selected_steps) opterr = 1; /* Let getopt report problems to the user */ while (1) { /* now cycle through the command line */ - c = getopt_long(argc, argv, "C:F:hj:Vv", + c = getopt_long(argc, argv, "F:hj:Vv", long_options, &optionIndex); if (c == -1) break; switch (c) { - case 'C': - params.opt_cluster = xstrdup(optarg); - break; case 'F': if(params.opt_field_list) xfree(params.opt_field_list); @@ -229,13 +315,10 @@ void parse_command_line(int argc, char **argv, List selected_steps) optarg); exit(1); } - params.opt_job_list = - xrealloc(params.opt_job_list, - (params.opt_job_list==NULL? 0 : - strlen(params.opt_job_list)) + - strlen(optarg) + 1); - strcat(params.opt_job_list, optarg); - strcat(params.opt_job_list, ","); + if(!params.opt_job_list) + params.opt_job_list = list_create( + destroy_jobacct_selected_step); + _addto_job_list(params.opt_job_list, optarg); break; case 'v': /* Handle -vvv thusly... @@ -281,13 +364,10 @@ void parse_command_line(int argc, char **argv, List selected_steps) optarg); exit(1); } - params.opt_job_list = - xrealloc(params.opt_job_list, - (params.opt_job_list==NULL? 0 : - strlen(params.opt_job_list)) + - strlen(optarg) + 1); - strcat(params.opt_job_list, optarg); - strcat(params.opt_job_list, ","); + if(!params.opt_job_list) + params.opt_job_list = list_create( + destroy_jobacct_selected_step); + _addto_job_list(params.opt_job_list, optarg); } if(!params.opt_field_list) { @@ -300,17 +380,13 @@ void parse_command_line(int argc, char **argv, List selected_steps) if (params.opt_verbose) { fprintf(stderr, "Options selected:\n" - "\topt_cluster=%s\n" "\topt_field_list=%s\n" - "\topt_header=%d\n" + "\topt_noheader=%d\n" "\topt_help=%d\n" - "\topt_job_list=%s\n" "\topt_verbose=%d\n", - params.opt_cluster, params.opt_field_list, - params.opt_header, + params.opt_noheader, params.opt_help, - params.opt_job_list, params.opt_verbose); logopt.stderr_level += params.opt_verbose; log_alter(logopt, 0, NULL); @@ -318,46 +394,20 @@ void parse_command_line(int argc, char **argv, List selected_steps) } /* specific jobs requested? */ - if (params.opt_job_list) { - start = params.opt_job_list; - while ((end = strstr(start, ",")) && start) { - *end = 0; - while (isspace(*start)) - start++; /* discard whitespace */ - if(!(int)*start) - continue; - selected_step = - xmalloc(sizeof(jobacct_selected_step_t)); - list_append(selected_steps, selected_step); - - dot = strstr(start, "."); - if (dot == NULL) { - debug2("No jobstep requested"); - selected_step->step = NULL; - selected_step->stepid = (uint32_t)NO_VAL; - } else { - *dot++ = 0; - selected_step->step = xstrdup(dot); - selected_step->stepid = atoi(dot); - } - selected_step->job = xstrdup(start); - selected_step->jobid = atoi(start); - start = end + 1; - } - if (params.opt_verbose) { - fprintf(stderr, "Jobs requested:\n"); - itr = list_iterator_create(selected_steps); - while((selected_step = list_next(itr))) { - if(selected_step->step) - fprintf(stderr, "\t: %s.%s\n", - selected_step->job, - selected_step->step); - else - fprintf(stderr, "\t: %s\n", - selected_step->job); - } - list_iterator_destroy(itr); + if (params.opt_verbose && params.opt_job_list + && list_count(params.opt_job_list)) { + fprintf(stderr, "Jobs requested:\n"); + itr = list_iterator_create(params.opt_job_list); + while((selected_step = list_next(itr))) { + if(selected_step->stepid != NO_VAL) + fprintf(stderr, "\t: %d.%d\n", + selected_step->jobid, + selected_step->stepid); + else + fprintf(stderr, "\t: %d\n", + selected_step->jobid); } + list_iterator_destroy(itr); } start = params.opt_field_list; diff --git a/src/sstat/sstat.c b/src/sstat/sstat.c index 62f246aeb..33c19cc51 100644 --- a/src/sstat/sstat.c +++ b/src/sstat/sstat.c @@ -64,16 +64,6 @@ jobacct_step_rec_t step; int printfields[MAX_PRINTFIELDS], /* Indexed into fields[] */ nprintfields = 0; -void _destroy_steps(void *object) -{ - jobacct_selected_step_t *step = (jobacct_selected_step_t *)object; - if(step) { - xfree(step->job); - xfree(step->step); - xfree(step); - } -} - void _print_header(void) { int i,j; @@ -239,28 +229,28 @@ int _do_stat(uint32_t jobid, uint32_t stepid) int main(int argc, char **argv) { ListIterator itr = NULL; - uint32_t jobid = 0; uint32_t stepid = 0; jobacct_selected_step_t *selected_step = NULL; - List selected_steps = list_create(_destroy_steps); - - parse_command_line(argc, argv, selected_steps); + parse_command_line(argc, argv); + if(!params.opt_job_list || !list_count(params.opt_job_list)) { + error("You didn't give me any jobs to stat."); + return 1; + } - if (params.opt_header) /* give them something to look */ + if (!params.opt_noheader) /* give them something to look */ _print_header();/* at while we think... */ - itr = list_iterator_create(selected_steps); + itr = list_iterator_create(params.opt_job_list); while((selected_step = list_next(itr))) { - jobid = atoi(selected_step->job); - if(selected_step->step) - stepid = atoi(selected_step->step); + if(selected_step->stepid != NO_VAL) + stepid = selected_step->stepid; else stepid = 0; - _do_stat(jobid, stepid); + _do_stat(selected_step->jobid, stepid); } list_iterator_destroy(itr); - list_destroy(selected_steps); + list_destroy(params.opt_job_list); return 0; } diff --git a/src/sstat/sstat.h b/src/sstat/sstat.h index 94b971980..23e15f9fd 100644 --- a/src/sstat/sstat.h +++ b/src/sstat/sstat.h @@ -114,6 +114,6 @@ void print_vsize(type_t type, void *object); /* options.c */ -void parse_command_line(int argc, char **argv, List selected_steps); +void parse_command_line(int argc, char **argv); #endif /* !_SACCT_H */ diff --git a/src/strigger/Makefile.am b/src/strigger/Makefile.am index ca5cb43b4..0f8cebf34 100644 --- a/src/strigger/Makefile.am +++ b/src/strigger/Makefile.am @@ -6,7 +6,7 @@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) bin_PROGRAMS = strigger -strigger_LDADD = $(top_builddir)/src/api/libslurmhelper.la +strigger_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = strigger.h strigger_SOURCES = strigger.c opts.c diff --git a/src/strigger/Makefile.in b/src/strigger/Makefile.in index 1755ca5fa..1e03e8571 100644 --- a/src/strigger/Makefile.in +++ b/src/strigger/Makefile.in @@ -75,7 +75,7 @@ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) PROGRAMS = $(bin_PROGRAMS) am_strigger_OBJECTS = strigger.$(OBJEXT) opts.$(OBJEXT) strigger_OBJECTS = $(am_strigger_OBJECTS) -strigger_DEPENDENCIES = $(top_builddir)/src/api/libslurmhelper.la +strigger_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o strigger_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(strigger_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -266,7 +266,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) -strigger_LDADD = $(top_builddir)/src/api/libslurmhelper.la +strigger_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = strigger.h strigger_SOURCES = strigger.c opts.c strigger_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) diff --git a/src/sview/Makefile.am b/src/sview/Makefile.am index bf2e360c9..ed216cbb7 100644 --- a/src/sview/Makefile.am +++ b/src/sview/Makefile.am @@ -11,8 +11,7 @@ bin_PROGRAMS = sview sview_LDADD = \ $(top_builddir)/src/plugins/select/bluegene/block_allocator/libbluegene_block_allocator.la \ - $(top_builddir)/src/api/libslurmhelper.la - + $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = sview.h sview_SOURCES = sview.c popups.c grid.c part_info.c job_info.c \ diff --git a/src/sview/Makefile.in b/src/sview/Makefile.in index c46c9bfb4..5ddd6b9ab 100644 --- a/src/sview/Makefile.in +++ b/src/sview/Makefile.in @@ -91,7 +91,7 @@ am__EXTRA_sview_SOURCES_DIST = sview.h sview.c popups.c grid.c \ admin_info.c common.c sview_OBJECTS = $(am_sview_OBJECTS) @HAVE_GTK_TRUE@sview_DEPENDENCIES = $(top_builddir)/src/plugins/select/bluegene/block_allocator/libbluegene_block_allocator.la \ -@HAVE_GTK_TRUE@ $(top_builddir)/src/api/libslurmhelper.la +@HAVE_GTK_TRUE@ $(top_builddir)/src/api/libslurm.o sview_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(sview_CFLAGS) $(CFLAGS) $(sview_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -286,7 +286,7 @@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) @HAVE_GTK_TRUE@sview_LDADD = \ @HAVE_GTK_TRUE@ $(top_builddir)/src/plugins/select/bluegene/block_allocator/libbluegene_block_allocator.la \ -@HAVE_GTK_TRUE@ $(top_builddir)/src/api/libslurmhelper.la +@HAVE_GTK_TRUE@ $(top_builddir)/src/api/libslurm.o -ldl @HAVE_GTK_TRUE@noinst_HEADERS = sview.h @HAVE_GTK_TRUE@sview_SOURCES = sview.c popups.c grid.c part_info.c job_info.c \ diff --git a/src/sview/popups.c b/src/sview/popups.c index b23f53192..6415cc5c0 100644 --- a/src/sview/popups.c +++ b/src/sview/popups.c @@ -214,7 +214,7 @@ static GtkTreeStore *_local_create_treestore_2cols(GtkWidget *popup, static void _layout_ctl_conf(GtkTreeStore *treestore, slurm_ctl_conf_info_msg_t *slurm_ctl_conf_ptr) { - char temp_str[32]; + char temp_str[32], temp_str2[128]; int update = 0; GtkTreeIter iter; @@ -379,8 +379,9 @@ static void _layout_ctl_conf(GtkTreeStore *treestore, add_display_treestore_line(update, treestore, &iter, "PlugStackConfig", slurm_ctl_conf_ptr->plugstack); - snprintf(temp_str, sizeof(temp_str), "%u", - slurm_ctl_conf_ptr->private_data); + private_data_string(slurm_ctl_conf_ptr->private_data, + temp_str2, sizeof(temp_str2)); + snprintf(temp_str, sizeof(temp_str), "%s", temp_str2); add_display_treestore_line(update, treestore, &iter, "PrivateData", temp_str); diff --git a/src/sview/sview.h b/src/sview/sview.h index 9d70257ed..d55d5f26f 100644 --- a/src/sview/sview.h +++ b/src/sview/sview.h @@ -70,6 +70,7 @@ #include "src/common/macros.h" #include "src/plugins/select/bluegene/block_allocator/block_allocator.h" #include "src/common/slurm_protocol_api.h" +#include "src/common/slurm_protocol_defs.h" #include "src/plugins/select/bluegene/wrap_rm_api.h" diff --git a/testsuite/expect/README b/testsuite/expect/README index 102284a02..cfb33bea5 100644 --- a/testsuite/expect/README +++ b/testsuite/expect/README @@ -499,5 +499,8 @@ test21.1 sacctmgr --usage test21.2 sacctmgr --help test21.3 sacctmgr -V test21.4 sacctmgr version -test21.5 sacctmgr add, list, and delete a cluster -test21.6 sacctmgr add, list, and delete multiple cluster +test21.5 sacctmgr add a cluster +test21.6 sacctmgr add multiple clusters +test21.7 sacctmgr list clusters +test21.8 sacctmgr modify a cluster +test21.9 sacctmgr modify multiple clusters diff --git a/testsuite/expect/globals b/testsuite/expect/globals index cdb60c4ae..05204c455 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -978,3 +978,69 @@ proc is_super_user { } { log_user 1 return $found_user } + +################################################################ +# +# +# +################################################################ +proc check_acct_associations { } { + global sacctmgr number + + set rc 1 + set min -1 + set max -1 + log_user 0 + send_user "Testing Associations\n" + # + # Use sacctmgr to check associations + # + set s_pid [spawn $sacctmgr -n -p list assoc wopi wopl withd format=lft,rgt] + expect { + -re "($number)\\|($number)\\|" { + # Here we are checking if we have duplicates and + # setting up an array to check for holes later + + set num1 $expect_out(1,string) + set num2 $expect_out(2,string) + set first [info exists found($num1)] + set sec [info exists found($num2)] + #send_user "$first=$num1 $sec=$num2\n"; + if { $first } { + send_user "FAILURE: found lft $num1 again\n" + set rc 0 + } elseif { $sec } { + send_user "FAILURE: found rgt $num2 again\n" + set rc 0 + } else { + set found($num1) 1 + set found($num2) 1 + if { $min == -1 || $min > $num1 } { + set min $num1 + } + if { $max == -1 || $max < $num2 } { + set max $num2 + } + } + exp_continue + } + timeout { + send_user "FAILURE: sacctmgr add not responding\n" + slow_kill $s_pid + set exit_code 1 + } + eof { + wait + } + } + + # Here we are checking for holes in the list from above + for {set inx $min} {$inx < $max} {incr inx} { + if { ![info exists found($inx)] } { + send_user "FAILURE: No index at $inx\n" + set rc 0 + } + } + log_user 1 + return $rc +} diff --git a/testsuite/expect/test1.23 b/testsuite/expect/test1.23 index ab47df669..2eb71e58c 100755 --- a/testsuite/expect/test1.23 +++ b/testsuite/expect/test1.23 @@ -108,7 +108,7 @@ set host_0 "" set timeout $max_job_delay set srun_pid [spawn $srun -N1 -l --mem=999999 -t1 $bin_hostname] expect { - -re "configuration is not available" { + -re "not available" { send_user "This error is expected, no worries\n" set err_msg 1 exp_continue diff --git a/testsuite/expect/test1.35 b/testsuite/expect/test1.35 index 298c223fa..d20e2c836 100755 --- a/testsuite/expect/test1.35 +++ b/testsuite/expect/test1.35 @@ -121,9 +121,25 @@ if {[wait_for_file $file_out] == 0} { } } if {$step_cnt != $steps_started} { - send_user "\nFAILURE: not all steps reported by squeue\n" - send_user " Check $file_err for errors\n" - set exit_code 1 + set mem_err 0 + if {[wait_for_file $file_err] == 0} { + spawn $bin_cat $file_err + expect { + -re "memory limit" { + set mem_err 1 + } + eof { + wait + } + } + } + if {$mem_err == 1} { + send_user "\nWARNING: Test could not complete due to memory limit\n" + set step_cnt $steps_started + } else { + send_user "\nFAILURE: not all steps reported by squeue\n" + set exit_code 1 + } } if {$exit_code == 0} { diff --git a/testsuite/expect/test1.38 b/testsuite/expect/test1.38 index 074f9a67a..fd14cf077 100755 --- a/testsuite/expect/test1.38 +++ b/testsuite/expect/test1.38 @@ -8,7 +8,8 @@ # "FAILURE: ..." otherwise with an explanation of the failure, OR # anything else indicates a failure mode that must be investigated. ############################################################################ -# Copyright (C) 2002-2006 The Regents of the University of California. +# Copyright (C) 2002-2007 The Regents of the University of California. +# Copyright (C) 2008 Lawrence Livermore National Security. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette <jette1@llnl.gov> # LLNL-CODE-402394. @@ -76,6 +77,11 @@ set matches 0 set job_id 0 set srun_pid [spawn $srun -v -N1 -t1 --unbuffered $file_in] expect { + -re "srun: task0: running" { + incr matches + cancel_job $job_id + exp_continue + } -re "launching ($number).0" { set job_id $expect_out(1,string) exp_continue @@ -85,15 +91,10 @@ expect { exec $bin_kill -INT $srun_pid exp_continue } - -re "srun: interrupt" { + -re "Force Terminated" { incr matches exp_continue } - -re "srun: task0: running" { - incr matches - cancel_job $job_id - exp_continue - } timeout { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid diff --git a/testsuite/expect/test1.49 b/testsuite/expect/test1.49 index fdbc7cfef..5c8f43fef 100755 --- a/testsuite/expect/test1.49 +++ b/testsuite/expect/test1.49 @@ -74,7 +74,7 @@ make_bash_script $file_in " # causes the test to fail # set matches 0 -set timeout $max_job_delay +set timeout [expr $max_job_delay + 30] if { [test_bluegene] } { set node_cnt 1-1024 } else { diff --git a/testsuite/expect/test1.7 b/testsuite/expect/test1.7 index 52b37442b..42efc6c93 100755 --- a/testsuite/expect/test1.7 +++ b/testsuite/expect/test1.7 @@ -95,7 +95,11 @@ set timeout [expr $max_job_delay + $sleep_time] set timed_out 0 set srun_pid [spawn $srun -t1 $bin_sleep $sleep_time] expect { - -re "time limit exceeded" { + -re "time limit" { + set timed_out 1 + exp_continue + } + -re "TIME LIMIT" { set timed_out 1 exp_continue } @@ -126,7 +130,11 @@ if {$timed_out == 1} { set completions 0 set srun_pid [spawn $srun -t4 $bin_sleep $sleep_time] expect { - -re "time limit exceeded" { + -re "time limit" { + set completions -1 + exp_continue + } + -re "TIME LIMIT" { set completions -1 exp_continue } diff --git a/testsuite/expect/test1.87 b/testsuite/expect/test1.87 index 18a53d453..b2ce6cca5 100755 --- a/testsuite/expect/test1.87 +++ b/testsuite/expect/test1.87 @@ -37,7 +37,7 @@ source ./globals set test_id "1.87" set exit_code 0 set file_in "test$test_id.input" -set prompt "SLURM_QA_PROMPT: " +set prompt "QA_PROMPT: " print_header $test_id diff --git a/testsuite/expect/test1.91 b/testsuite/expect/test1.91 index f0a2669ff..256b0d4ba 100755 --- a/testsuite/expect/test1.91 +++ b/testsuite/expect/test1.91 @@ -9,6 +9,7 @@ # anything else indicates a failure mode that must be investigated. ############################################################################ # Copyright (C) 2005-2007 The Regents of the University of California. +# Copyright (C) 2008 Lawrence Livermore National Security. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette <jette1@llnl.gov> # LLNL-CODE-402394. @@ -58,23 +59,39 @@ if {$affinity == 0} { send_user "\nWARNING: task affinity not supported on this system\n" exit 0 } + send_user "\ntask affinity plugin installed\n" -set num_sockets 0 -set num_cores 0 -set num_threads 0 +# Identify a usable node +set timeout $max_job_delay set node_name "" -log_user 0 - -# Here we need to get the last node since if we run this on heterogeneous -# systems the count can change - -spawn $scontrol show node +set srun_pid [spawn $srun -N1 --exclusive --verbose $bin_hostname] expect { - -re "NodeName=($alpha_numeric)" { + -re "on host ($alpha_numeric_under)" { set node_name $expect_out(1,string) exp_continue } + timeout { + send_user "\nFAILURE: srun not responding\n" + slow_kill $srun_pid + set exit_code 1 + } + eof { + wait + } +} +if {[string compare $node_name ""] == 0} { + send_user "\nFAILURE: failed to get a usable node name\n" + exit 1 +} + +# Determine how many sockets, cores, and threads the node has +set num_sockets 0 +set num_cores 0 +set num_threads 0 +log_user 0 +spawn $scontrol show node $node_name +expect { -re "Sockets=($number)" { set num_sockets $expect_out(1,string) exp_continue diff --git a/testsuite/expect/test15.7 b/testsuite/expect/test15.7 index ee9a6e126..f928c5ba5 100755 --- a/testsuite/expect/test15.7 +++ b/testsuite/expect/test15.7 @@ -101,7 +101,7 @@ expect { } exp_continue } - -re "MinMemory=($number)" { + -re "MinMemoryNode=($number)" { set read_mem $expect_out(1,string) if {$read_mem == $mem_size} { incr matches diff --git a/testsuite/expect/test17.10 b/testsuite/expect/test17.10 index e50a538e2..d2b11f3a2 100755 --- a/testsuite/expect/test17.10 +++ b/testsuite/expect/test17.10 @@ -104,7 +104,7 @@ expect { } exp_continue } - -re "MinMemory=($number)" { + -re "MinMemoryNode=($number)" { set read_mem $expect_out(1,string) if {$read_mem == $mem_size} { incr matches diff --git a/testsuite/expect/test19.3 b/testsuite/expect/test19.3 index 41b700718..eb5c7f2a6 100755 --- a/testsuite/expect/test19.3 +++ b/testsuite/expect/test19.3 @@ -41,7 +41,7 @@ print_header $test_id # # get my uid and clear any vestigial triggers # -set uid 0 +set uid -1 spawn $bin_id -u expect { -re "($number)" { @@ -52,9 +52,12 @@ expect { wait } } -if {$uid == 0} { +if {$uid == -1} { send_user "\nCan't get my uid\n" exit 1 +} elseif {$uid == 0} { + send_user "\nWARNING: Can't run this test as user root\n" + exit 0 } exec $strigger --clear --quiet --user=$uid diff --git a/testsuite/expect/test19.4 b/testsuite/expect/test19.4 index 789e4a9aa..cfe61d7ad 100755 --- a/testsuite/expect/test19.4 +++ b/testsuite/expect/test19.4 @@ -41,7 +41,7 @@ print_header $test_id # # get my uid and clear any vestigial triggers # -set uid 0 +set uid -1 spawn $bin_id -u expect { -re "($number)" { @@ -52,9 +52,12 @@ expect { wait } } -if {$uid == 0} { +if {$uid == -1} { send_user "\nCan't get my uid\n" exit 1 +} elseif {$uid == 0} { + send_user "\nWARNING: Can't run this test as user root\n" + exit 0 } exec $strigger --clear --quiet --user=$uid diff --git a/testsuite/expect/test19.5 b/testsuite/expect/test19.5 index 609541be0..39f40b68f 100755 --- a/testsuite/expect/test19.5 +++ b/testsuite/expect/test19.5 @@ -44,7 +44,7 @@ print_header $test_id # # get my uid and clear any vestigial triggers # -set uid 0 +set uid -1 spawn $bin_id -u expect { -re "($number)" { @@ -55,9 +55,12 @@ expect { wait } } -if {$uid == 0} { +if {$uid == -1} { send_user "\nCan't get my uid\n" exit 1 +} elseif {$uid == 0} { + send_user "\nWARNING: Can't run this test as user root\n" + exit 0 } exec $strigger --clear --quiet --user=$uid diff --git a/testsuite/expect/test19.6 b/testsuite/expect/test19.6 index b2d17013b..9e871ca97 100755 --- a/testsuite/expect/test19.6 +++ b/testsuite/expect/test19.6 @@ -45,7 +45,7 @@ print_header $test_id # # get my uid and clear any vestigial triggers # -set uid 0 +set uid -1 spawn $bin_id -u expect { -re "($number)" { @@ -56,9 +56,12 @@ expect { wait } } -if {$uid == 0} { +if {$uid == -1} { send_user "\nCan't get my uid\n" exit 1 +} elseif {$uid == 0} { + send_user "\nWARNING: Can't run this test as user root\n" + exit 0 } exec $strigger --clear --quiet --user=$uid diff --git a/testsuite/expect/test19.7 b/testsuite/expect/test19.7 index 8dd3ddee3..d4f809814 100755 --- a/testsuite/expect/test19.7 +++ b/testsuite/expect/test19.7 @@ -42,7 +42,7 @@ print_header $test_id # # get my uid and clear any vestigial triggers # -set uid 0 +set uid -1 spawn $bin_id -u expect { -re "($number)" { @@ -53,9 +53,12 @@ expect { wait } } -if {$uid == 0} { +if {$uid == -1} { send_user "\nCan't get my uid\n" exit 1 +} elseif {$uid == 0} { + send_user "\nWARNING: Can't run this test as user root\n" + exit 0 } exec $strigger --clear --quiet --user=$uid diff --git a/testsuite/expect/test21.5 b/testsuite/expect/test21.5 index cee55d8f8..60f8c6d93 100755 --- a/testsuite/expect/test21.5 +++ b/testsuite/expect/test21.5 @@ -1,7 +1,7 @@ #!/usr/bin/expect ############################################################################ # Purpose: Test of SLURM functionality -# sacctmgr add, list, and delete a cluster +# sacctmgr add a cluster # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR @@ -33,16 +33,28 @@ source ./globals set test_id "21.5" set exit_code 0 -set amatches 0 -set lmatches 0 -set dmatches 0 +set amatches 0 +set lmatches 0 +set dmatches 0 set not_support 0 set add add set lis list set del delete set mod modify +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall set clu cluster -set tc1 tCluster1 +set tc1 tcluster1 +set fs1 2500 +set mc1 1000000 +set mj1 50 +set mn1 300 +set mw1 01:00:00 print_header $test_id @@ -50,57 +62,43 @@ print_header $test_id # # Use sacctmgr to create a cluster # -set sadd_pid [spawn $sacctmgr $add $clu $tc1] +set sadd_pid [spawn $sacctmgr -i add $clu $nams=$tc1 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] expect { -re "Adding Cluster" { incr amatches exp_continue } - -re "Name *= $tc1" { + -re "$nam *= $tc1" { incr amatches exp_continue } - -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + -re "User Defaults" { incr amatches exp_continue } - -re "\\\(N\\\/y\\\):" { + -re "$fs *= $fs1" { incr amatches - send "Y\r" exp_continue } - timeout { - send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid - set exit_code 1 + -re "$mc *= $mc1" { + incr amatches + exp_continue } - eof { - wait + -re "$mj *= $mj1" { + incr amatches + exp_continue } -} - -if {$amatches != 4} { - send_user "\nFAILURE: sacctmgr had a problem adding clusters\n" - set exit_code 1 -} - -# -# Use sacctmgr to list the addition of cluster -# -set slist_pid [spawn $sacctmgr $lis $clu $tc1] -expect { - -re "Name" { - incr lmatches + -re "$mn *= $mn1" { + incr amatches exp_continue } - -re "$tc1" { - incr lmatches + -re "$mw *= $mw1" { + incr amatches exp_continue - send_user "\nFound $tc1 in database\n" } timeout { - send_user "\nFAILURE: sacctmgr list not responding\n" - slow_kill $slist_pid + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid set exit_code 1 } eof { @@ -108,15 +106,19 @@ expect { } } -if {$lmatches != 2} { - send_user "\nFAILURE: sacctmgr had a problem listing clusters\n" +if {$amatches != 8} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters + got $amatches\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" set exit_code 1 } - # # Use sacctmgr to delete the test cluster # -set sadel_pid [spawn $sacctmgr $del $clu $tc1] +set sadel_pid [spawn $sacctmgr -i $del $clu $tc1] expect { -re "Deleting clusters" { incr dmatches @@ -126,15 +128,6 @@ expect { incr dmatches exp_continue } - -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { - incr dmatches - exp_continue - } - -re "\\\(N\\\/y\\\):" { - incr dmatches - send "Y\r" - exp_continue - } timeout { send_user "\nFAILURE: sacctmgr delete not responding\n" slow_kill $sadel_pid @@ -145,8 +138,13 @@ expect { } } -if {$dmatches != 4} { - send_user "\nFAILURE: sacctmgr had a problem deleting cluster\n" +if {$dmatches != 2} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $dmatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" set exit_code 1 } diff --git a/testsuite/expect/test21.6 b/testsuite/expect/test21.6 index 8cf05b9e7..7ebf96df6 100755 --- a/testsuite/expect/test21.6 +++ b/testsuite/expect/test21.6 @@ -1,7 +1,7 @@ #!/usr/bin/expect ############################################################################ # Purpose: Test of SLURM functionality -# sacctmgr add, list, and delete multiple clusters +# sacctmgr add multiple clusters # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR @@ -33,18 +33,30 @@ source ./globals set test_id "21.6" set exit_code 0 -set amatches 0 -set lmatches 0 -set dmatches 0 +set amatches 0 +set lmatches 0 +set dmatches 0 set not_support 0 set add add set lis list set del delete set mod modify +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall set clu cluster -set tc1 tCluster1 -set tc2 tCluster2 -set tc3 tCluster3 +set tc1 tcluster1 +set tc2 tcluster2 +set tc3 tcluster3 +set fs1 2500 +set mc1 1000000 +set mj1 50 +set mn1 300 +set mw1 01:00:00 print_header $test_id @@ -52,67 +64,60 @@ print_header $test_id # # Use sacctmgr to create a cluster # -set sadd_pid [spawn $sacctmgr $add $clu $tc1,$tc2,$tc3] +set sadd_pid [spawn $sacctmgr $add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] expect { -re "Adding Cluster" { incr amatches exp_continue } - -re "Name *= $tc1" { + -re "$nam *= $tc1" { incr amatches exp_continue } - -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + -re "$nam *= $tc2" { incr amatches exp_continue } - -re "\\\(N\\\/y\\\):" { + -re "$nam *= $tc3" { incr amatches - send "Y\r" exp_continue } - timeout { - send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid - set exit_code 1 + -re "User Defaults" { + incr amatches + exp_continue } - eof { - wait + -re "$fs *= $fs1" { + incr amatches + exp_continue } -} - -if {$amatches != 4} { - send_user "\nFAILURE: sacctmgr had a problem adding clusters\n" - set exit_code 1 -} - -# -# Use sacctmgr to list the addition of cluster -# -set slist_pid [spawn $sacctmgr $lis $clu $tc1,$tc2,$tc3] -expect { - -re "Name" { - incr lmatches + -re "$mc *= $mc1" { + incr amatches exp_continue } - -re "$tc1" { - incr lmatches + -re "$mj *= $mj1" { + incr amatches exp_continue - send_user "\nFound $tc1 in database\n" } - -re "$tc2" { - incr lmatches + -re "$mn *= $mn1" { + incr amatches exp_continue - send_user "\nFound $tc1 in database\n" } - -re "$tc3" { - incr lmatches + -re "$mw *= $mw1" { + incr amatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr amatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr amatches + send "Y\r" exp_continue - send_user "\nFound $tc1 in database\n" } timeout { - send_user "\nFAILURE: sacctmgr list not responding\n" - slow_kill $slist_pid + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid set exit_code 1 } eof { @@ -120,8 +125,12 @@ expect { } } -if {$lmatches != 4} { - send_user "\nFAILURE: sacctmgr had a problem listing clusters\n" +if {$amatches != 12} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" set exit_code 1 } @@ -161,6 +170,10 @@ if {$dmatches != 4} { send_user "\nFAILURE: sacctmgr had a problem deleting cluster\n" set exit_code 1 } +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} if {$exit_code == 0} { send_user "\nSUCCESS\n" diff --git a/testsuite/expect/test21.7 b/testsuite/expect/test21.7 new file mode 100755 index 000000000..622967c51 --- /dev/null +++ b/testsuite/expect/test21.7 @@ -0,0 +1,225 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr list clusters +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.7" +set exit_code 0 +set amatches 0 +set lmatches 0 +set dmatches 0 +set not_support 0 +set add add +set lis list +set del delete +set mod modify +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall +set clu cluster +set tc1 tcluster1 +set tc2 tcluster2 +set tc3 tcluster3 +set fs1 2500 +set mc1 1000000 +set mj1 50 +set mn1 300 +set mw1 01:00:00 + + +print_header $test_id + +# +# Use sacctmgr to create a cluster +# +set sadd_pid [spawn $sacctmgr $add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] +expect { + -re "Adding Cluster" { + incr amatches + exp_continue + } + -re "$nam *= $tc1" { + incr amatches + exp_continue + } + -re "$nam *= $tc2" { + incr amatches + exp_continue + } + -re "$nam *= $tc3" { + incr amatches + exp_continue + } + -re "User Defaults" { + incr amatches + exp_continue + } + -re "$fs *= $fs1" { + incr amatches + exp_continue + } + -re "$mc *= $mc1" { + incr amatches + exp_continue + } + -re "$mj *= $mj1" { + incr amatches + exp_continue + } + -re "$mn *= $mn1" { + incr amatches + exp_continue + } + -re "$mw *= $mw1" { + incr amatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr amatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr amatches + send "Y\r" + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$amatches != 12} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the addition of cluster +# +set slist_pid [spawn $sacctmgr $lis $clu] +expect { + -re "Cluster" { + incr lmatches + exp_continue + } + -re "$tc1" { + incr lmatches + exp_continue + } + -re "$fs1 *$mc1 *$mj1 *$mn1 *$mw1" { + incr lmatches + exp_continue + } + -re "$tc2" { + incr lmatches + exp_continue + } + -re "$fs1 *$mc1 *$mj1 *$mn1 *$mw1" { + incr lmatches + exp_continue + } + -re "$tc3" { + incr lmatches + exp_continue + } + -re "$fs1 *$mc1 *$mj1 *$mn1 *$mw1" { + incr lmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list not responding\n" + slow_kill $slist_pid + set exit_code 1 + } + eof { + wait + } +} + +# +# Use sacctmgr to delete the test cluster +# +set sadel_pid [spawn $sacctmgr $del $clu $tc1,$tc2,$tc3] +expect { + -re "Deleting clusters" { + incr dmatches + exp_continue + } + -re "$tc1" { + incr dmatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr dmatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr dmatches + send "Y\r" + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dmatches != 4} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} + +exit $exit_code diff --git a/testsuite/expect/test21.8 b/testsuite/expect/test21.8 new file mode 100755 index 000000000..170044417 --- /dev/null +++ b/testsuite/expect/test21.8 @@ -0,0 +1,292 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr modify cluster +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.8" +set exit_code 0 +set amatches 0 +set mmatches 0 +set lmatches 0 +set dmatches 0 +set not_support 0 +set add add +set lis list +set del delete +set mod modify +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall +set clu cluster +set tc1 tcluster1 +set tc2 tcluster2 +set tc3 tcluster3 +set fs1 2500 +set fs2 1375 +set mc1 1000000 +set mc2 200000 +set mj1 50 +set mj2 9 +set mn1 300 +set mn2 125 +set mw1 01:00:00 +set mw2 12:00:00 + + +print_header $test_id + +# +# Use sacctmgr to create a cluster +# +set sadd_pid [spawn $sacctmgr $add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] +expect { + -re "Adding Cluster" { + incr amatches + exp_continue + } + -re "$nam *= $tc1" { + incr amatches + exp_continue + } + -re "$nam *= $tc2" { + incr amatches + exp_continue + } + -re "$nam *= $tc3" { + incr amatches + exp_continue + } + -re "User Defaults" { + incr amatches + exp_continue + } + -re "$fs *= $fs1" { + incr amatches + exp_continue + } + -re "$mc *= $mc1" { + incr amatches + exp_continue + } + -re "$mj *= $mj1" { + incr amatches + exp_continue + } + -re "$mn *= $mn1" { + incr amatches + exp_continue + } + -re "$mw *= $mw1" { + incr amatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr amatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr amatches + send "Y\r" + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$amatches != 12} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to modify one cluster +# +set smod_pid [spawn $sacctmgr $mod $clu set $fs=$fs2 $mc=$mc2 $mj=$mj2 $mn=$mn2 $mw=$mw2 where $nams=$tc2] +expect { + -re "Setting" { + incr mmatches + exp_continue + } + -re "User Defaults" { + incr mmatches + exp_continue + } + -re "$fs *= $fs2" { + incr mmatches + exp_continue + } + -re "$mc *= $mc2" { + incr mmatches + exp_continue + } + -re "$mj *= $mj2" { + incr mmatches + exp_continue + } + -re "$mn *= $mn2" { + incr mmatches + exp_continue + } + -re "$mw *= $mw2" { + incr mmatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr mmatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr mmatches + send "Y\r" + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr modify not responding\n" + slow_kill $smod_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$mmatches != 9} { + send_user "\nFAILURE: sacctmgr had a problem modifying clusters\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the addition of cluster +# +set slist_pid [spawn $sacctmgr $lis $clu] +expect { + -re "Cluster" { + incr lmatches + exp_continue + } + -re "$tc1" { + incr lmatches + exp_continue + } + -re "$fs1 *$mc1 *$mj1 *$mn1 *$mw1" { + incr lmatches + exp_continue + } + -re "$tc2" { + incr lmatches + exp_continue + } + -re "$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + incr lmatches + exp_continue + } + -re "$tc3" { + incr lmatches + exp_continue + } + -re "$fs1 *$mc1 *$mj1 *$mn1 *$mw1" { + incr lmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list not responding\n" + slow_kill $slist_pid + set exit_code 1 + } + eof { + wait + } +} + +# +# Use sacctmgr to delete the test cluster +# +set sadel_pid [spawn $sacctmgr $del $clu $tc1,$tc2,$tc3] +expect { + -re "Deleting clusters" { + incr dmatches + exp_continue + } + -re "$tc1" { + incr dmatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr dmatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr dmatches + send "Y\r" + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dmatches != 4} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} + +exit $exit_code diff --git a/testsuite/expect/test21.9 b/testsuite/expect/test21.9 new file mode 100755 index 000000000..12c0dd911 --- /dev/null +++ b/testsuite/expect/test21.9 @@ -0,0 +1,292 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr modify multiple clusters +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.9" +set exit_code 0 +set amatches 0 +set mmatches 0 +set lmatches 0 +set dmatches 0 +set not_support 0 +set add add +set lis list +set del delete +set mod modify +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall +set clu cluster +set tc1 tcluster1 +set tc2 tcluster2 +set tc3 tcluster3 +set fs1 2500 +set fs2 1375 +set mc1 1000000 +set mc2 200000 +set mj1 50 +set mj2 9 +set mn1 300 +set mn2 125 +set mw1 01:00:00 +set mw2 12:00:00 + + +print_header $test_id + +# +# Use sacctmgr to create a cluster +# +set sadd_pid [spawn $sacctmgr $add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] +expect { + -re "Adding Cluster" { + incr amatches + exp_continue + } + -re "$nam *= $tc1" { + incr amatches + exp_continue + } + -re "$nam *= $tc2" { + incr amatches + exp_continue + } + -re "$nam *= $tc3" { + incr amatches + exp_continue + } + -re "User Defaults" { + incr amatches + exp_continue + } + -re "$fs *= $fs1" { + incr amatches + exp_continue + } + -re "$mc *= $mc1" { + incr amatches + exp_continue + } + -re "$mj *= $mj1" { + incr amatches + exp_continue + } + -re "$mn *= $mn1" { + incr amatches + exp_continue + } + -re "$mw *= $mw1" { + incr amatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr amatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr amatches + send "Y\r" + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$amatches != 12} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to modify one cluster +# +set smod_pid [spawn $sacctmgr $mod $clu set $fs=$fs2 $mc=$mc2 $mj=$mj2 $mn=$mn2 $mw=$mw2 where $nams=$tc1,$tc2,$tc3] +expect { + -re "Setting" { + incr mmatches + exp_continue + } + -re "User Defaults" { + incr mmatches + exp_continue + } + -re "$fs *= $fs2" { + incr mmatches + exp_continue + } + -re "$mc *= $mc2" { + incr mmatches + exp_continue + } + -re "$mj *= $mj2" { + incr mmatches + exp_continue + } + -re "$mn *= $mn2" { + incr mmatches + exp_continue + } + -re "$mw *= $mw2" { + incr mmatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr mmatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr mmatches + send "Y\r" + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr modify not responding\n" + slow_kill $smod_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$mmatches != 9} { + send_user "\nFAILURE: sacctmgr had a problem modifying clusters\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the addition of cluster +# +set slist_pid [spawn $sacctmgr $lis $clu] +expect { + -re "Cluster" { + incr lmatches + exp_continue + } + -re "$tc1" { + incr lmatches + exp_continue + } + -re "$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + incr lmatches + exp_continue + } + -re "$tc2" { + incr lmatches + exp_continue + } + -re "$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + incr lmatches + exp_continue + } + -re "$tc3" { + incr lmatches + exp_continue + } + -re "$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + incr lmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list not responding\n" + slow_kill $slist_pid + set exit_code 1 + } + eof { + wait + } +} + +# +# Use sacctmgr to delete the test cluster +# +set sadel_pid [spawn $sacctmgr $del $clu $tc1,$tc2,$tc3] +expect { + -re "Deleting clusters" { + incr dmatches + exp_continue + } + -re "$tc1" { + incr dmatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr dmatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr dmatches + send "Y\r" + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dmatches != 4} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} + +exit $exit_code diff --git a/testsuite/expect/test5.6 b/testsuite/expect/test5.6 index b6b74ac73..7d2879d34 100755 --- a/testsuite/expect/test5.6 +++ b/testsuite/expect/test5.6 @@ -287,7 +287,7 @@ expect { wait } } -if {[string compare partition2 ""]} { +if {[string compare partition2 ""] == 0} { set partition2 $partition1 } spawn $squeue --format=%P --noheader --partitions=$partition1 -- GitLab