diff --git a/META b/META index b59079a2bb91c9e6df3c9434bcb6e420f4652509..b498c1bb57fc3f53e34dd13113ef237efa055fef 100644 --- a/META +++ b/META @@ -3,9 +3,9 @@ Api_revision: 0 Major: 1 Meta: 1 - Micro: 5 + Micro: 6 Minor: 3 Name: slurm Release: 1 - Release_tags: dist - Version: 1.3.5 + Release_tags: + Version: 1.3.6 diff --git a/NEWS b/NEWS index bb862d7c5315fc133c2cdda3b6ad121c568862e3..da0360eb0fa3811152d8cdb44e67b28a67399864 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,35 @@ This file describes changes in recent versions of SLURM. It primarily documents those changes that are of interest to users and admins. +* Changes in SLURM 1.3.6 +======================== + -- Add new function to get information for a single job rather than always + getting information for all jobs. Improved performance of some commands. + NOTE: This new RPC means that the slurmctld daemons should be updated + before or at the same time as the compute nodes in order to process it. + -- In salloc, sbatch, and srun replace --task-mem options with --mem-per-cpu + (--task-mem will continue to be accepted for now, but is not documented). + Replace DefMemPerTask and MaxMemPerTask with DefMemPerCPU, DefMemPerNode, + MaxMemPerCPU and MaxMemPerNode in slurm.conf (old options still accepted + for now, but mapped to "PerCPU" parameters and not documented). Allocate + a job's memory memory at the same time that processors are allocated based + upon the --mem or --mem-per-cpu option rather than when job steps are + initiated. + -- Altered QOS in accounting to be a list of admin defined states, an + account or user can have multiple QOS's now. They need to be defined using + 'sacctmgr add qos'. They are no longer an enum. If none are defined + Normal will be the QOS for everything. Right now this is only for use + with MOAB. Does nothing outside of that. + -- Added spank_get_item support for field S_STEP_CPUS_PER_TASK. + -- Make corrections in spank_get_item for field S_JOB_NCPUS, previously + reported task count rather than CPU count. + -- Convert configuration parameter PrivateData from on/off flag to have + separate flags for job, partition, and node data. See "man slurm.conf" + for details. + -- Fix bug, failed to load DisableRootJobs configuration parameter. + -- Altered sacctmgr to always return a non-zero exit code on error and send + error messages to stderr. + * Changes in SLURM 1.3.5 ======================== -- Fix processing of auth/munge authtentication key for messages originating @@ -13,8 +42,9 @@ documents those changes that are of interest to users and admins. -- Added more support for "dumping" account information to a flat file and read in again to protect data incase something bad happens to the database. -- Sacct will now report account names for job steps. - -- For AIX: Remove MP_POERESTART_ENV environment variable, disabling poerestart - command. User must explicitly set MP_POERESTART_ENV before executing poerestart. + -- For AIX: Remove MP_POERESTART_ENV environment variable, disabling + poerestart command. User must explicitly set MP_POERESTART_ENV before + executing poerestart. -- Put back notification that a job has been allocated resources when it was pending. @@ -373,21 +403,32 @@ documents those changes that are of interest to users and admins. Moved existing digital signature logic into new plugin: crypto/openssl. Added new support for crypto/munge (available with GPL license). +* Changes in SLURM 1.2.33 +========================= + -- Cancelled or Failed jobs will now report their job and step id on exit + -- Add SPANK items available to get: SLURM_VERSION, SLURM_VERSION_MAJOR, + SLURM_VERISON_MINOR and SLURM_VERSION_MICRO. + -- Fixed handling of SIGPIPE in srun. Abort job. + -- Fix bug introduced to MVAPICH plugin preventing use of TotalView debugger. + -- Modify slurmctld to get srun/salloc network address based upon the incoming + message rather than hostname set by the user command (backport of logic in + SLURM v1.3). + * Changes in SLURM 1.2.32 ========================= - -- Disable scancel of job in RootOnly partition only for sched/wiki2 (Moab). - Permit user scancel from other configurations (e.g. LSF). + -- LSF only: Enable scancel of job in RootOnly partition by the job's owner. -- Add support for sbatch --distribution and --network options. -- Correct pending job's wait reason to "Priority" rather than "Resources" if required resources are being held in reserve for a higher priority job. - -- In sched/wiki2 (Moab) report a node's state as "Drained" rather than "Draining" - if it has no allocated work (An undocumented Moab wiki option, see CRI - ticket #2394). + -- In sched/wiki2 (Moab) report a node's state as "Drained" rather than + "Draining" if it has no allocated work (An undocumented Moab wiki option, + see CRI ticket #2394). -- Log to job's output when it is cancelled or reaches it's time limit (ported from existing code in slurm v1.3). -- Add support in salloc and sbatch commands for --network option. -- Add support for user environment variables that include '\n' (e.g. bash functions). + -- Partial rewrite of mpi/mvapich plugin for improved scalability. * Changes in SLURM 1.2.31 ========================= @@ -3328,4 +3369,4 @@ documents those changes that are of interest to users and admins. -- Change directory to /tmp in slurmd if daemonizing. -- Logfiles are reopened on reconfigure. -$Id: NEWS 14453 2008-07-08 20:26:18Z da $ +$Id: NEWS 14583 2008-07-21 17:18:13Z da $ diff --git a/configure b/configure index fe472cf3a0e361b3137bba160dbf42f97c83dd9a..785c74f702f2d4a887874077603f070b2e7c103f 100755 --- a/configure +++ b/configure @@ -27196,7 +27196,7 @@ _ACEOF -ac_config_files="$ac_config_files Makefile config.xml auxdir/Makefile contribs/Makefile contribs/perlapi/Makefile contribs/perlapi/libslurm-perl/Makefile.PL contribs/torque/Makefile contribs/phpext/Makefile contribs/phpext/slurm_php/config.m4 src/Makefile src/api/Makefile src/common/Makefile src/database/Makefile src/sacct/Makefile src/sacctmgr/Makefile src/sreport/Makefile src/sstat/Makefile src/salloc/Makefile src/sbatch/Makefile src/sattach/Makefile src/srun/Makefile src/slurmd/Makefile src/slurmd/slurmd/Makefile src/slurmd/slurmstepd/Makefile src/slurmdbd/Makefile src/slurmctld/Makefile src/sbcast/Makefile src/scontrol/Makefile src/scancel/Makefile src/squeue/Makefile src/sinfo/Makefile src/smap/Makefile src/strigger/Makefile src/sview/Makefile src/plugins/Makefile src/plugins/accounting_storage/Makefile src/plugins/accounting_storage/filetxt/Makefile src/plugins/accounting_storage/gold/Makefile src/plugins/accounting_storage/mysql/Makefile src/plugins/accounting_storage/pgsql/Makefile src/plugins/accounting_storage/none/Makefile src/plugins/accounting_storage/slurmdbd/Makefile src/plugins/auth/Makefile src/plugins/auth/authd/Makefile src/plugins/auth/munge/Makefile src/plugins/auth/none/Makefile src/plugins/checkpoint/Makefile src/plugins/checkpoint/aix/Makefile src/plugins/checkpoint/none/Makefile src/plugins/checkpoint/ompi/Makefile src/plugins/checkpoint/xlch/Makefile src/plugins/crypto/Makefile src/plugins/crypto/munge/Makefile src/plugins/crypto/openssl/Makefile src/plugins/jobacct_gather/Makefile src/plugins/jobacct_gather/linux/Makefile src/plugins/jobacct_gather/aix/Makefile src/plugins/jobacct_gather/none/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/jobcomp/pgsql/Makefile src/plugins/jobcomp/slurmdbd/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/pgid/Makefile src/plugins/proctrack/linuxproc/Makefile src/plugins/proctrack/rms/Makefile src/plugins/proctrack/sgi_job/Makefile src/plugins/sched/Makefile src/plugins/sched/backfill/Makefile src/plugins/sched/builtin/Makefile src/plugins/sched/gang/Makefile src/plugins/sched/hold/Makefile src/plugins/sched/wiki/Makefile src/plugins/sched/wiki2/Makefile src/plugins/select/Makefile src/plugins/select/bluegene/Makefile src/plugins/select/bluegene/block_allocator/Makefile src/plugins/select/bluegene/plugin/Makefile src/plugins/select/linear/Makefile src/plugins/select/cons_res/Makefile src/plugins/switch/Makefile src/plugins/switch/elan/Makefile src/plugins/switch/none/Makefile src/plugins/switch/federation/Makefile src/plugins/mpi/Makefile src/plugins/mpi/mpich1_p4/Makefile src/plugins/mpi/mpich1_shmem/Makefile src/plugins/mpi/mpichgm/Makefile src/plugins/mpi/mpichmx/Makefile src/plugins/mpi/mvapich/Makefile src/plugins/mpi/lam/Makefile src/plugins/mpi/none/Makefile src/plugins/mpi/openmpi/Makefile src/plugins/task/Makefile src/plugins/task/affinity/Makefile src/plugins/task/none/Makefile doc/Makefile doc/man/Makefile doc/html/Makefile doc/html/configurator.html testsuite/Makefile testsuite/expect/Makefile testsuite/slurm_unit/Makefile testsuite/slurm_unit/common/Makefile testsuite/slurm_unit/slurmctld/Makefile testsuite/slurm_unit/slurmd/Makefile testsuite/slurm_unit/api/Makefile testsuite/slurm_unit/api/manual/Makefile" +ac_config_files="$ac_config_files Makefile config.xml auxdir/Makefile contribs/Makefile contribs/perlapi/Makefile contribs/perlapi/libslurm-perl/Makefile.PL contribs/torque/Makefile contribs/phpext/Makefile contribs/phpext/slurm_php/config.m4 src/Makefile src/api/Makefile src/common/Makefile src/database/Makefile src/sacct/Makefile src/sacctmgr/Makefile src/sreport/Makefile src/sstat/Makefile src/salloc/Makefile src/sbatch/Makefile src/sattach/Makefile src/srun/Makefile src/slurmd/Makefile src/slurmd/slurmd/Makefile src/slurmd/slurmstepd/Makefile src/slurmdbd/Makefile src/slurmctld/Makefile src/sbcast/Makefile src/scontrol/Makefile src/scancel/Makefile src/squeue/Makefile src/sinfo/Makefile src/smap/Makefile src/strigger/Makefile src/sview/Makefile src/plugins/Makefile src/plugins/accounting_storage/Makefile src/plugins/accounting_storage/filetxt/Makefile src/plugins/accounting_storage/gold/Makefile src/plugins/accounting_storage/mysql/Makefile src/plugins/accounting_storage/pgsql/Makefile src/plugins/accounting_storage/none/Makefile src/plugins/accounting_storage/slurmdbd/Makefile src/plugins/auth/Makefile src/plugins/auth/authd/Makefile src/plugins/auth/munge/Makefile src/plugins/auth/none/Makefile src/plugins/checkpoint/Makefile src/plugins/checkpoint/aix/Makefile src/plugins/checkpoint/none/Makefile src/plugins/checkpoint/ompi/Makefile src/plugins/checkpoint/xlch/Makefile src/plugins/crypto/Makefile src/plugins/crypto/munge/Makefile src/plugins/crypto/openssl/Makefile src/plugins/jobacct_gather/Makefile src/plugins/jobacct_gather/linux/Makefile src/plugins/jobacct_gather/aix/Makefile src/plugins/jobacct_gather/none/Makefile src/plugins/jobcomp/Makefile src/plugins/jobcomp/filetxt/Makefile src/plugins/jobcomp/none/Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/jobcomp/pgsql/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/pgid/Makefile src/plugins/proctrack/linuxproc/Makefile src/plugins/proctrack/rms/Makefile src/plugins/proctrack/sgi_job/Makefile src/plugins/sched/Makefile src/plugins/sched/backfill/Makefile src/plugins/sched/builtin/Makefile src/plugins/sched/gang/Makefile src/plugins/sched/hold/Makefile src/plugins/sched/wiki/Makefile src/plugins/sched/wiki2/Makefile src/plugins/select/Makefile src/plugins/select/bluegene/Makefile src/plugins/select/bluegene/block_allocator/Makefile src/plugins/select/bluegene/plugin/Makefile src/plugins/select/linear/Makefile src/plugins/select/cons_res/Makefile src/plugins/switch/Makefile src/plugins/switch/elan/Makefile src/plugins/switch/none/Makefile src/plugins/switch/federation/Makefile src/plugins/mpi/Makefile src/plugins/mpi/mpich1_p4/Makefile src/plugins/mpi/mpich1_shmem/Makefile src/plugins/mpi/mpichgm/Makefile src/plugins/mpi/mpichmx/Makefile src/plugins/mpi/mvapich/Makefile src/plugins/mpi/lam/Makefile src/plugins/mpi/none/Makefile src/plugins/mpi/openmpi/Makefile src/plugins/task/Makefile src/plugins/task/affinity/Makefile src/plugins/task/none/Makefile doc/Makefile doc/man/Makefile doc/html/Makefile doc/html/configurator.html testsuite/Makefile testsuite/expect/Makefile testsuite/slurm_unit/Makefile testsuite/slurm_unit/common/Makefile testsuite/slurm_unit/slurmctld/Makefile testsuite/slurm_unit/slurmd/Makefile testsuite/slurm_unit/api/Makefile testsuite/slurm_unit/api/manual/Makefile" cat >confcache <<\_ACEOF @@ -27976,7 +27976,6 @@ do "src/plugins/jobcomp/script/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobcomp/script/Makefile" ;; "src/plugins/jobcomp/mysql/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobcomp/mysql/Makefile" ;; "src/plugins/jobcomp/pgsql/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobcomp/pgsql/Makefile" ;; - "src/plugins/jobcomp/slurmdbd/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/jobcomp/slurmdbd/Makefile" ;; "src/plugins/proctrack/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/Makefile" ;; "src/plugins/proctrack/aix/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/aix/Makefile" ;; "src/plugins/proctrack/pgid/Makefile") CONFIG_FILES="$CONFIG_FILES src/plugins/proctrack/pgid/Makefile" ;; diff --git a/configure.ac b/configure.ac index 56782ee48f9a50d5de344517387a6da9616e91fc..d2c076132a00a198bc6c781c565155bc298211be 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -# $Id: configure.ac 14147 2008-05-28 22:46:41Z da $ +# $Id: configure.ac 14598 2008-07-21 20:12:45Z da $ # This file is to be processed with autoconf to generate a configure script dnl Prologue @@ -333,7 +333,6 @@ AC_CONFIG_FILES([Makefile src/plugins/jobcomp/script/Makefile src/plugins/jobcomp/mysql/Makefile src/plugins/jobcomp/pgsql/Makefile - src/plugins/jobcomp/slurmdbd/Makefile src/plugins/proctrack/Makefile src/plugins/proctrack/aix/Makefile src/plugins/proctrack/pgid/Makefile diff --git a/contribs/perlapi/libslurm-perl/msg.h b/contribs/perlapi/libslurm-perl/msg.h index 23c17b34e1077b57139a98b0788a41f557d2fccb..2f36413fff89b2788718d0a165a6c4d3c015a898 100644 --- a/contribs/perlapi/libslurm-perl/msg.h +++ b/contribs/perlapi/libslurm-perl/msg.h @@ -57,9 +57,12 @@ inline static int av_store_int(AV* av, int index, int val) */ inline static int hv_store_charp(HV* hv, const char *key, charp val) { - SV* sv = newSVpv(val, 0); - - if (hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { + SV* sv = NULL; + + if(val) + sv = newSVpv(val, 0); + + if (!key || hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { SvREFCNT_dec(sv); return -1; } @@ -73,7 +76,7 @@ inline static int hv_store_uint32_t(HV* hv, const char *key, uint32_t val) { SV* sv = newSVuv(val); - if (hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { + if (!key || hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { SvREFCNT_dec(sv); return -1; } @@ -87,7 +90,7 @@ inline static int hv_store_uint16_t(HV* hv, const char *key, uint16_t val) { SV* sv = newSVuv(val); - if (hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { + if (!key || hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { SvREFCNT_dec(sv); return -1; } @@ -101,7 +104,7 @@ inline static int hv_store_uint8_t(HV* hv, const char *key, uint8_t val) { SV* sv = newSVuv(val); - if (hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { + if (!key || hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { SvREFCNT_dec(sv); return -1; } @@ -114,7 +117,7 @@ inline static int hv_store_int(HV* hv, const char *key, int val) { SV* sv = newSViv(val); - if (hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { + if (!key || hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { SvREFCNT_dec(sv); return -1; } @@ -126,7 +129,7 @@ inline static int hv_store_int(HV* hv, const char *key, int val) */ inline static int hv_store_bool(HV* hv, const char *key, bool val) { - if (hv_store(hv, key, (I32)strlen(key), (val ? &PL_sv_yes : &PL_sv_no), 0) == NULL) { + if (!key || hv_store(hv, key, (I32)strlen(key), (val ? &PL_sv_yes : &PL_sv_no), 0) == NULL) { return -1; } return 0; @@ -139,7 +142,7 @@ inline static int hv_store_time_t(HV* hv, const char *key, time_t val) { SV* sv = newSVuv(val); - if (hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { + if (!key || hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { SvREFCNT_dec(sv); return -1; } @@ -151,7 +154,7 @@ inline static int hv_store_time_t(HV* hv, const char *key, time_t val) */ inline static int hv_store_sv(HV* hv, const char *key, SV* sv) { - if (hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { + if (!key || hv_store(hv, key, (I32)strlen(key), sv, 0) == NULL) { return -1; } return 0; diff --git a/doc/html/accounting.shtml b/doc/html/accounting.shtml index 6a2138ab877df2e5c61eeaccd9546c61976f68af..657db16057cc0c6164d5609fd5b8f01d57aa6c96 100644 --- a/doc/html/accounting.shtml +++ b/doc/html/accounting.shtml @@ -45,7 +45,7 @@ or SlurmDBD for added security.</li> </ul> <p>The use of sacct or sstat to view information about completed jobs -is dependent upon both JobAcctGatherType and JobAcctGatherType +is dependent upon both JobAcctGatherType and AccountingStorageType being configured to collect and store that information. The use of sreport is dependent upon some database being used to store that information.</p> @@ -59,7 +59,7 @@ pathname of the file (e.g. <i>JobCompLoc=/var/log/slurm/job_completions</i>). Use the <i>logrotate</i> or similar tool to prevent the log files from getting too large. -Send a SIGHUP signal to the <i>slurmctld</i> deaemon +Send a SIGHUP signal to the <i>slurmctld</i> daemon after moving the files, but before compressing them so that new log files will be created.</p> @@ -253,7 +253,7 @@ warning message.</li> The port number that the Slurm Database Daemon (slurmdbd) listens to for work. The default value is SLURMDBD_PORT as established at system build time. If none is explicitly specified, it will be set to 6819. -This value must be equal to the <i>SlurmDbdPort</i> parameter in the +This value must be equal to the <i>AccountingStoragePort</i> parameter in the slurm.conf file.</li> <li><b>LogFile</b>: @@ -566,4 +566,5 @@ completely. This is meant to clean up after typographic errors.</p> <p style="text-align: center;">Last modified 27 June 2008</p> -</ul></body></html> +<!--#include virtual="footer.txt"--> + diff --git a/doc/html/configurator.html.in b/doc/html/configurator.html.in index 2b61e0922a9f68addf9f1bd8fc92f14701a3b418..a10f6f157f5db799d0efe9b5f677df1944a0062e 100644 --- a/doc/html/configurator.html.in +++ b/doc/html/configurator.html.in @@ -161,7 +161,7 @@ function displayfile() "MpiDefault=" + get_radio_value(document.config.mpi_default) + "<br>" + "#PluginDir= <br>" + "#PlugStackConfig= <br>" + - "#PrivateData=0 <br>" + + "#PrivateData=jobs <br>" + "ProctrackType=proctrack/" + get_radio_value(document.config.proctrack_type) + "<br>" + get_field("Prolog",document.config.prolog) + "<br>" + "#PropagatePrioProcess=0 <br>" + @@ -206,9 +206,9 @@ function displayfile() "# <br>" + "# <br>" + "# SCHEDULING <br>" + - "#DefMemPerTask=0 <br>" + + "#DefMemPerCPU=0 <br>" + "FastSchedule=" + get_radio_value(document.config.fast_schedule) + "<br>" + - "#MaxMemPerTask=0 <br>" + + "#MaxMemPerCPU=0 <br>" + "#SchedulerRootFilter=1 <br>" + "#SchedulerTimeSlice=30 <br>" + "SchedulerType=sched/" + get_radio_value(document.config.sched_type) + "<br>" + @@ -812,6 +812,6 @@ before terminating all remaining tasks. A value of zero indicates unlimited wait </FORM> <HR> <P class="footer">LLNL-WEB-402631<BR> -Last modified 1 April 2008</P> +Last modified 17 July 2008</P> </BODY> diff --git a/doc/html/cons_res.shtml b/doc/html/cons_res.shtml index ae4f0229330198d13b00876d6f270a0dad6f8747..368810a9ebc241aab3849330d9073e34b8b017e7 100644 --- a/doc/html/cons_res.shtml +++ b/doc/html/cons_res.shtml @@ -28,15 +28,15 @@ this plug-in is described below. slurm.conf (e.g. <i>SelectType=select/cons_res</i>).</li> <pre> # -# "SelectType" : node selection logic for scheduling. -# "select/bluegene" : the default on BlueGene systems, aware of -# system topology, manages bglblocks, etc. -# "select/cons_res" : allocate individual consumable resources -# (i.e. processors, memory, etc.) -# "select/linear" : the default on non-BlueGene systems, -# no topology awareness, oriented toward -# allocating nodes to jobs rather than -# resources within a node (e.g. CPUs) +# "SelectType" : node selection logic for scheduling. +# "select/bluegene" : the default on BlueGene systems, aware of +# system topology, manages bglblocks, etc. +# "select/cons_res" : allocate individual consumable resources +# (i.e. processors, memory, etc.) +# "select/linear" : the default on non-BlueGene systems, +# no topology awareness, oriented toward +# allocating nodes to jobs rather than +# resources within a node (e.g. CPUs) # # SelectType=select/linear SelectType=select/cons_res @@ -98,15 +98,15 @@ SelectType=select/cons_res SelectTypeParameter in the slurm.conf.</li> <pre> # -# "SelectType" : node selection logic for scheduling. -# "select/bluegene" : the default on BlueGene systems, aware of -# system topology, manages bglblocks, etc. -# "select/cons_res" : allocate individual consumable resources -# (i.e. processors, memory, etc.) -# "select/linear" : the default on non-BlueGene systems, -# no topology awareness, oriented toward -# allocating nodes to jobs rather than -# resources within a node (e.g. CPUs) +# "SelectType" : node selection logic for scheduling. +# "select/bluegene" : the default on BlueGene systems, aware of +# system topology, manages bglblocks, etc. +# "select/cons_res" : allocate individual consumable resources +# (i.e. processors, memory, etc.) +# "select/linear" : the default on non-BlueGene systems, +# no topology awareness, oriented toward +# allocating nodes to jobs rather than +# resources within a node (e.g. CPUs) # # SelectType=select/linear SelectType=select/cons_res @@ -115,34 +115,33 @@ SelectType=select/cons_res # - select/bluegene - this parameter is currently ignored # - select/linear - this parameter is currently ignored # - select/cons_res - the parameters available are -# - CR_CPU (1) - CPUs as consumable resources. -# No notion of sockets, cores, or threads. -# On a multi-core system CPUs will be cores -# On a multi-core/hyperthread system CPUs will -# be threads -# On a single-core systems CPUs are CPUs. ;-) -# - CR_Socket (2) - Sockets as a consumable resource. -# - CR_Core (3) - Cores as a consumable resource. -# (Not yet implemented) -# - CR_Memory (4) - Memory as a consumable resource. -# Note! CR_Memory assumes Shared=Yes -# - CR_Socket_Memory (5) - Socket and Memory as consumable -# resources. -# - CR_Core_Memory (6) - Core and Memory as consumable -# resources. (Not yet implemented) -# - CR_CPU_Memory (7) - CPU and Memory as consumable -# resources. +# - CR_CPU (1) - CPUs as consumable resources. +# No notion of sockets, cores, or threads. +# On a multi-core system CPUs will be cores +# On a multi-core/hyperthread system CPUs +# will be threads +# On a single-core systems CPUs are CPUs. +# - CR_Socket (2) - Sockets as a consumable resource. +# - CR_Core (3) - Cores as a consumable resource. +# - CR_Memory (4) - Memory as a consumable resource. +# Note! CR_Memory assumes Shared=Yes +# - CR_Socket_Memory (5) - Socket and Memory as consumable +# resources. +# - CR_Core_Memory (6) - Core and Memory as consumable +# resources. (Not yet implemented) +# - CR_CPU_Memory (7) - CPU and Memory as consumable +# resources. # # (#) refer to the output of "scontrol show config" # -# NB!: The -E extension for sockets, cores, and threads -# are ignored within the node allocation mechanism -# when CR_CPU or CR_CPU_MEMORY is selected. -# They are considered to compute the total number of -# tasks when -n is not specified +# NB!: The -E extension for sockets, cores, and threads +# are ignored within the node allocation mechanism +# when CR_CPU or CR_CPU_MEMORY is selected. +# They are considered to compute the total number of +# tasks when -n is not specified # # NB! All CR_s assume Shared=No or Shared=Force EXCEPT for -# CR_MEMORY which assumes Shared=Yes +# CR_MEMORY which assumes Shared=Yes # #SelectTypeParameters=CR_CPU (default) </pre> @@ -169,7 +168,7 @@ SelectType=select/cons_res way as when using the default node selection scheme.</li> <li>The <i>--exclusive</i> srun switch allows users to request nodes in exclusive mode even when consumable resources is enabled. see "man srun" - for details. </li> + for details. </li> <li>srun's <i>-s</i> or <i>--share</i> is incompatible with the consumable resource environment and will therefore not be honored. Since in this environment nodes are shared by default, <i>--exclusive</i> allows users to obtain dedicated nodes.</li> @@ -213,19 +212,18 @@ Please send comments and requests about the consumable resources to # srun sleep 100 & # srun sleep 100 & # squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 1132 allNodes sleep sballe R 0:05 1 hydra12 - 1133 allNodes sleep sballe R 0:04 1 hydra12 - 1134 allNodes sleep sballe R 0:02 1 hydra12 +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 1132 allNodes sleep sballe R 0:05 1 hydra12 + 1133 allNodes sleep sballe R 0:04 1 hydra12 + 1134 allNodes sleep sballe R 0:02 1 hydra12 # srun -N 2-2 -E 2:2 sleep 100 & srun: job 1135 queued and waiting for resources #squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 1135 allNodes sleep sballe PD 0:00 2 (Resources) - 1132 allNodes sleep sballe R 0:24 1 hydra12 - 1133 allNodes sleep sballe R 0:23 1 hydra12 - 1134 allNodes sleep sballe R 0:21 1 hydra12 -# +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 1135 allNodes sleep sballe PD 0:00 2 (Resources) + 1132 allNodes sleep sballe R 0:24 1 hydra12 + 1133 allNodes sleep sballe R 0:23 1 hydra12 + 1134 allNodes sleep sballe R 0:21 1 hydra12 </pre> <li><b>Proposed solution:</b> Enhance the selection mechanism to go through {node,socket,core,thread}-tuplets to find available match for specific request (bounded knapsack problem). </li> </ul> @@ -248,7 +246,7 @@ srun: job 1135 queued and waiting for resources <h2>Examples of CR_Memory, CR_Socket_Memory, and CR_CPU_Memory type consumable resources</h2> <pre> -sinfo -lNe +# sinfo -lNe NODELIST NODES PARTITION STATE CPUS S:C:T MEMORY hydra[12-16] 5 allNodes* ... 4 2:2:1 2007 </pre> @@ -256,59 +254,59 @@ hydra[12-16] 5 allNodes* ... 4 2:2:1 2007 <p>Using select/cons_res plug-in with CR_Memory</p> <pre> Example: -srun -N 5 -n 20 --job-mem=1000 sleep 100 & <-- running -srun -N 5 -n 20 --job-mem=10 sleep 100 & <-- running -srun -N 5 -n 10 --job-mem=1000 sleep 100 & <-- queued and waiting for resources - -squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 1820 allNodes sleep sballe PD 0:00 5 (Resources) - 1818 allNodes sleep sballe R 0:17 5 hydra[12-16] - 1819 allNodes sleep sballe R 0:11 5 hydra[12-16] +# srun -N 5 -n 20 --job-mem=1000 sleep 100 & <-- running +# srun -N 5 -n 20 --job-mem=10 sleep 100 & <-- running +# srun -N 5 -n 10 --job-mem=1000 sleep 100 & <-- queued and waiting for resources + +# squeue +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 1820 allNodes sleep sballe PD 0:00 5 (Resources) + 1818 allNodes sleep sballe R 0:17 5 hydra[12-16] + 1819 allNodes sleep sballe R 0:11 5 hydra[12-16] </pre> <p>Using select/cons_res plug-in with CR_Socket_Memory (2 sockets/node)</p> <pre> Example 1: -srun -N 5 -n 5 --job-mem=1000 sleep 100 & <-- running -srun -n 1 -w hydra12 --job-mem=2000 sleep 100 & <-- queued and waiting for resources +# srun -N 5 -n 5 --job-mem=1000 sleep 100 & <-- running +# srun -n 1 -w hydra12 --job-mem=2000 sleep 100 & <-- queued and waiting for resources -squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 1890 allNodes sleep sballe PD 0:00 1 (Resources) - 1889 allNodes sleep sballe R 0:08 5 hydra[12-16] +# squeue +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 1890 allNodes sleep sballe PD 0:00 1 (Resources) + 1889 allNodes sleep sballe R 0:08 5 hydra[12-16] Example 2: -srun -N 5 -n 10 --job-mem=10 sleep 100 & <-- running -srun -n 1 --job-mem=10 sleep 100 & <-- queued and waiting for resourcessqueue +# srun -N 5 -n 10 --job-mem=10 sleep 100 & <-- running +# srun -n 1 --job-mem=10 sleep 100 & <-- queued and waiting for resourcessqueue -squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 1831 allNodes sleep sballe PD 0:00 1 (Resources) - 1830 allNodes sleep sballe R 0:07 5 hydra[12-16] +# squeue +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 1831 allNodes sleep sballe PD 0:00 1 (Resources) + 1830 allNodes sleep sballe R 0:07 5 hydra[12-16] </pre> <p>Using select/cons_res plug-in with CR_CPU_Memory (4 CPUs/node)</p> <pre> Example 1: -srun -N 5 -n 5 --job-mem=1000 sleep 100 & <-- running -srun -N 5 -n 5 --job-mem=10 sleep 100 & <-- running -srun -N 5 -n 5 --job-mem=1000 sleep 100 & <-- queued and waiting for resources +# srun -N 5 -n 5 --job-mem=1000 sleep 100 & <-- running +# srun -N 5 -n 5 --job-mem=10 sleep 100 & <-- running +# srun -N 5 -n 5 --job-mem=1000 sleep 100 & <-- queued and waiting for resources -squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 1835 allNodes sleep sballe PD 0:00 5 (Resources) - 1833 allNodes sleep sballe R 0:10 5 hydra[12-16] - 1834 allNodes sleep sballe R 0:07 5 hydra[12-16] +# squeue +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 1835 allNodes sleep sballe PD 0:00 5 (Resources) + 1833 allNodes sleep sballe R 0:10 5 hydra[12-16] + 1834 allNodes sleep sballe R 0:07 5 hydra[12-16] Example 2: -srun -N 5 -n 20 --job-mem=10 sleep 100 & <-- running -srun -n 1 --job-mem=10 sleep 100 & <-- queued and waiting for resources +# srun -N 5 -n 20 --job-mem=10 sleep 100 & <-- running +# srun -n 1 --job-mem=10 sleep 100 & <-- queued and waiting for resources -squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 1837 allNodes sleep sballe PD 0:00 1 (Resources) - 1836 allNodes sleep sballe R 0:11 5 hydra[12-16] +# squeue +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 1837 allNodes sleep sballe PD 0:00 1 (Resources) + 1836 allNodes sleep sballe R 0:11 5 hydra[12-16] </pre> <p class="footer"><a href="#top">top</a></p> @@ -365,11 +363,11 @@ have one idle cpu and linux04 has 3 idle cpus.</p> <pre> # squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 3 lsf sleep root PD 0:00 3 (Resources) - 4 lsf sleep root PD 0:00 1 (Resources) - 5 lsf sleep root PD 0:00 1 (Resources) - 2 lsf sleep root R 0:14 4 xc14n[13-16] +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 3 lsf sleep root PD 0:00 3 (Resources) + 4 lsf sleep root PD 0:00 1 (Resources) + 5 lsf sleep root PD 0:00 1 (Resources) + 2 lsf sleep root R 0:14 4 xc14n[13-16] </pre> <p>Once Job 2 is finished, Job 3 is scheduled and runs on @@ -381,10 +379,10 @@ and Job 4 can run concurrently on the cluster.</p> <pre> # squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 5 lsf sleep root PD 0:00 1 (Resources) - 3 lsf sleep root R 0:11 3 xc14n[13-15] - 4 lsf sleep root R 0:11 1 xc14n16 +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 5 lsf sleep root PD 0:00 1 (Resources) + 3 lsf sleep root R 0:11 3 xc14n[13-15] + 4 lsf sleep root R 0:11 1 xc14n16 </pre> <p>Once Job 3 finishes, Job 5 is allocated resources and can run.</p> @@ -426,16 +424,16 @@ nodes) and Job 4 is scheduled onto one of the remaining idle cpus on Linux04.</p <pre> # squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 5 lsf sleep root PD 0:00 1 (Resources) - 2 lsf sleep root R 0:13 4 linux[01-04] - 3 lsf sleep root R 0:09 3 linux[01-03] - 4 lsf sleep root R 0:05 1 linux04 +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 5 lsf sleep root PD 0:00 1 (Resources) + 2 lsf sleep root R 0:13 4 linux[01-04] + 3 lsf sleep root R 0:09 3 linux[01-03] + 4 lsf sleep root R 0:05 1 linux04 # sinfo -lNe NODELIST NODES PARTITION STATE CPUS MEMORY TMP_DISK WEIGHT FEATURES REASON -linux[01-03] 3 lsf* allocated 2 2981 1 1 (null) none -linux04 1 lsf* allocated 4 3813 1 1 (null) none +linux[01-03] 3 lsf* allocated 2 2981 1 1 (null) none +linux04 1 lsf* allocated 4 3813 1 1 (null) none </pre> <p>Once Job 2 finishes, Job 5, which was pending, is allocated available resources and is then @@ -443,10 +441,10 @@ running as illustrated below:</p> <pre> # squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 3 lsf sleep root R 1:58 3 linux[01-03] - 4 lsf sleep root R 1:54 1 linux04 - 5 lsf sleep root R 0:02 3 linux[01-03] +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 3 lsf sleep root R 1:58 3 linux[01-03] + 4 lsf sleep root R 1:54 1 linux04 + 5 lsf sleep root R 0:02 3 linux[01-03] # sinfo -lNe NODELIST NODES PARTITION STATE CPUS MEMORY TMP_DISK WEIGHT FEATURES REASON linux[01-03] 3 lsf* allocated 2 2981 1 1 (null) none @@ -457,8 +455,8 @@ linux04 1 lsf* idle 4 3813 1 1 (null) no <pre> # squeue - JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) - 5 lsf sleep root R 1:52 3 linux[01-03] +JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON) + 5 lsf sleep root R 1:52 3 linux[01-03] </pre> <p>Job 3 and Job 4 have finshed and Job 5 is still running on nodes linux[01-03].</p> @@ -480,6 +478,6 @@ one mpi process per node.</p> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 25 September 2006</p> +<p style="text-align:center;">Last modified 8 July 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/cons_res_share.shtml b/doc/html/cons_res_share.shtml index 66715e41d32a2979600373c4077ec757818e4976..2221f4a2e586d6f672b40f385f4cdca1cf7f1dd6 100644 --- a/doc/html/cons_res_share.shtml +++ b/doc/html/cons_res_share.shtml @@ -173,7 +173,9 @@ to begin running "on top of" the existing jobs. This happens with the <H3>Memory Management</H3> <P> -The management of memory as a consumable resource remains unchanged: +The management of memory as a consumable resource remains unchanged and +can be used to prevent oversubscription of memory, which would result in +having memory pages swapped out and severely degraded performance. </P> <TABLE CELLPADDING=3 CELLSPACING=1 BORDER=1> <TR><TH>Selection Setting</TH> @@ -202,21 +204,28 @@ available memory to meet the job's memory requirement will not be allocated to the job.</TD> </TR> </TABLE> -<P>Note that the <CODE>srun --mem=<num></CODE> option is only used to -request nodes that have <num> amount of real memory. This option does not -compute memory that is currently available. -</P><P> -The <CODE>srun --job-mem=<num></CODE> option is used with the -<CODE>select/cons_res</CODE> plugin to request available memory from each node. -</P><P> -The <CODE>select/cons_res</CODE> plugin tracks memory usage by each job on each -node regardless of the number partitions a node may be assigned to. The primary -purpose of tracking memory as a consumable resource is to protect jobs from -having their memory pages swapped out because the memory has been overcommitted. -</P> +<P>Users can specify their job's memory requirements one of two ways. +<CODE>--mem=<num></CODE> can be used to specify the job's memory +requirement on a per allocated node basis. This option is probably best +suited for use with the <CODE>select/linear</CODE> plugin, which allocates +whole nodes to jobs. +<CODE>--mem-per-cpu=<num></CODE> can be used to specify the job's +memory requirement on a per allocated CPU basis. This is probably best +suited for use with the <CODE>select/cons_res</CODE> plugin which can +allocate individual CPUs to jobs.</P> + +<P>Default and maximum values for memory on a per node or per CPU basis can +be configued using the following options: <CODE>DefMemPerCPU</CODE>, +<CODE>DefMemPerNode</CODE>, <CODE>MaxMemPerCPU</CODE> and <CODE>MaxMemPerNode</CODE>. +Users can use the <CODE>--mem</CODE> or <CODE>--mem-per-cpu</CODE> option +at job submission time to specify their memory requirements. +Enforcement of a job's memory allocation is performed by the accounting +plugin, which periodically gathers data about running jobs. Set +<CODE>JobAcctGather</CODE> and <CODE>JobAcctFrequency</CODE> to +values suitable for your system.</P> <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 27 May 2008</p> +<p style="text-align:center;">Last modified 8 July 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/footer.txt b/doc/html/footer.txt index 49b3c490e0a24b72b7a456525f653d689750aeb4..dbed09229de86685e99ebb3e7a88fa755db5407a 100644 --- a/doc/html/footer.txt +++ b/doc/html/footer.txt @@ -1,8 +1,8 @@ </div> <!-- closes "content" --> <div id="footer"> -<div id="left"> <span class="ucrlnum">LLNL-WEB-402631 |</span> <a href="https://www.llnl.gov/disclaimer.html" target="_blank" class="privacy">Privacy & Legal Notice</a></div> -<div id="right"><span class="ucrlnum">12 March 2008 </span></div> +<div id="left"> <span class="ucrlnum">LLNL-WEB-405518 |</span> <a href="https://www.llnl.gov/disclaimer.html" target="_blank" class="privacy">Privacy & Legal Notice</a></div> +<div id="right"><span class="ucrlnum">18 July 2008 </span></div> </div> <div id="footer2"> diff --git a/doc/html/gang_scheduling.shtml b/doc/html/gang_scheduling.shtml index 66c0b7cf690a86d02f163c293690400f21297874..e8d37467bb11b663d28df404c56201a00af4227c 100644 --- a/doc/html/gang_scheduling.shtml +++ b/doc/html/gang_scheduling.shtml @@ -8,29 +8,30 @@ to jobs. Beginning in SLURM version 1.3, gang scheduling is supported. Gang scheduling is when two or more jobs are allocated to the same resources and these jobs are alternately suspended to let all of the tasks of each -job have full access to the shared resources for a period of time. +job have full access to the shared resources for a period of time. </P> -<P> +<P> A resource manager that supports timeslicing can improve it's responsiveness and utilization by allowing more jobs to begin running sooner. Shorter-running jobs no longer have to wait in a queue behind longer-running jobs. Instead they can be run "in parallel" with the longer-running jobs, which will allow them to finish quicker. Throughput is also improved because overcommitting the resources provides opportunities for "local backfilling" to occur (see example -below). +below). </P> -<P> +<P> The SLURM 1.3.0 the <I>sched/gang</I> plugin provides timeslicing. When enabled, -it monitors each of the partitions in SLURM. If a new job has been allocated to -resources in a partition that have already been allocated to an existing job, -then the plugin will suspend the new job until the configured -<I>SchedulerTimeslice</I> interval has elapsed. Then it will suspend the -running job and let the new job make use of the resources for a -<I>SchedulerTimeslice</I> interval. This will continue until one of the -jobs terminates. +it monitors each of the partitions in SLURM. If a new job has been allocated to +resources in a partition that have already been allocated to an existing job, +then the plugin will suspend the new job until the configured +<I>SchedulerTimeslice</I> interval has elapsed. Then it will suspend the +running job and let the new job make use of the resources for a +<I>SchedulerTimeslice</I> interval. This will continue until one of the +jobs terminates. </P> <H2>Configuration</H2> + <P> There are several important configuration parameters relating to gang scheduling: @@ -46,15 +47,20 @@ allocated by the <I>select/cons_res</I> plugin. with jobs, the resource selection plugin should be configured to track the amount of memory used by each job to ensure that memory page swapping does not occur. When <I>select/linear</I> is chosen, we recommend setting -<I>SelectTypeParameter=CR_Memory</I>. When <I>select/cons_res</I> is -chosen, we recommend including Memory as a resource (ex. +<I>SelectTypeParameter=CR_Memory</I>. When <I>select/cons_res</I> is +chosen, we recommend including Memory as a resource (ex. <I>SelectTypeParameter=CR_Core_Memory</I>). </LI> <LI> -<B>DefMemPerTask</B>: Since job requests may not explicitly specify -a memory requirement, we also recommend configuring <I>DefMemPerTask</I> -(default memory per task). It may also be desirable to configure -<I>MaxMemPerTask</I> (maximum memory per task) in <I>slurm.conf</I>. +<B>DefMemPerCPU</B>: Since job requests may not explicitly specify +a memory requirement, we also recommend configuring +<I>DefMemPerCPU</I> (default memory per allocated CPU) or +<I>DefMemPerNode</I> (default memory per allocated node). +It may also be desirable to configure +<I>MaxMemPerCPU</I> (maximum memory per allocated CPU) or +<I>MaxMemPerNode</I> (maximum memory per allocated node) in <I>slurm.conf</I>. +Users can use the <I>--mem</I> or <I>--mem-per-cpu</I> option +at job submission time to specify their memory requirements. </LI> <LI> <B>JobAcctGatherType and JobAcctGatherFrequency</B>: @@ -63,9 +69,9 @@ using the <I>JobAcctGatherType</I> and <I>JobAcctGatherFrequency</I> parameters. If accounting is enabled and a job exceeds its configured memory limits, it will be canceled in order to prevent it from adversely effecting other jobs sharing the same resources. -</LI> +</LI> <LI> -<B>SchedulerType</B>: Configure the <I>sched/gang</I> plugin by setting +<B>SchedulerType</B>: Configure the <I>sched/gang</I> plugin by setting <I>SchedulerType=sched/gang</I> in <I>slurm.conf</I>. </LI> <LI> @@ -88,7 +94,7 @@ allocated to a common resource, set <I>Shared=FORCE:6</I>. In order to enable gang scheduling after making the configuration changes described above, restart SLURM if it is already running. Any change to the plugin settings in SLURM requires a full restart of the daemons. If you -just change the partition <I>Shared</I> setting, this can be updated with +just change the partition <I>Shared</I> setting, this can be updated with <I>scontrol reconfig</I>. </P> <P> @@ -96,377 +102,412 @@ For an advanced topic discussion on the potential use of swap space, see "Making use of swap space" in the "Future Work" section below. </P> -<H2>Timeslicer Design and Operation</H2> +<H2>Timeslicer Design and Operation</H2> <P> -When enabled, the <I>sched/gang</I> plugin keeps track of the resources -allocated to all jobs. For each partition an "active bitmap" is maintained that -tracks all concurrently running jobs in the SLURM cluster. Each time a new -job is allocated to resources in a partition, the <I>sched/gang</I> plugin -compares these newly allocated resources with the resources already maintained -in the "active bitmap". If these two sets of resources are disjoint then the new -job is added to the "active bitmap". If these two sets of resources overlap then -the new job is suspended. All jobs are tracked in a per-partition job queue +When enabled, the <I>sched/gang</I> plugin keeps track of the resources +allocated to all jobs. For each partition an "active bitmap" is maintained that +tracks all concurrently running jobs in the SLURM cluster. Each time a new +job is allocated to resources in a partition, the <I>sched/gang</I> plugin +compares these newly allocated resources with the resources already maintained +in the "active bitmap". If these two sets of resources are disjoint then the new +job is added to the "active bitmap". If these two sets of resources overlap then +the new job is suspended. All jobs are tracked in a per-partition job queue within the <I>sched/gang</I> plugin. </P> <P> -A separate <I>timeslicer thread</I> is spawned by the <I>sched/gang</I> plugin -on startup. This thread sleeps for the configured <I>SchedulerTimeSlice</I> -interval. When it wakes up, it checks each partition for suspended jobs. If -suspended jobs are found then the <I>timeslicer thread</I> moves all running -jobs to the end of the job queue. It then reconstructs the "active bitmap" for -this partition beginning with the suspended job that has waited the longest to -run (this will be the first suspended job in the run queue). Each following job -is then compared with the new "active bitmap", and if the job can be run -concurrently with the other "active" jobs then the job is added. Once this is -complete then the <I>timeslicer thread</I> suspends any currently running jobs -that are no longer part of the "active bitmap", and resumes jobs that are new to +A separate <I>timeslicer thread</I> is spawned by the <I>sched/gang</I> plugin +on startup. This thread sleeps for the configured <I>SchedulerTimeSlice</I> +interval. When it wakes up, it checks each partition for suspended jobs. If +suspended jobs are found then the <I>timeslicer thread</I> moves all running +jobs to the end of the job queue. It then reconstructs the "active bitmap" for +this partition beginning with the suspended job that has waited the longest to +run (this will be the first suspended job in the run queue). Each following job +is then compared with the new "active bitmap", and if the job can be run +concurrently with the other "active" jobs then the job is added. Once this is +complete then the <I>timeslicer thread</I> suspends any currently running jobs +that are no longer part of the "active bitmap", and resumes jobs that are new to the "active bitmap". </P> <P> -This <I>timeslicer thread</I> algorithm for rotating jobs is designed to prevent -jobs from starving (remaining in the suspended state indefinitly) and to be as -fair as possible in the distribution of runtime while still keeping all of the +This <I>timeslicer thread</I> algorithm for rotating jobs is designed to prevent +jobs from starving (remaining in the suspended state indefinitly) and to be as +fair as possible in the distribution of runtime while still keeping all of the resources as busy as possible. </P> <P> -The <I>sched/gang</I> plugin suspends jobs via the same internal functions that -support <I>scontrol suspend</I> and <I>scontrol resume</I>. A good way to -observe the operation of the timeslicer is by running <I>watch squeue</I> in a +The <I>sched/gang</I> plugin suspends jobs via the same internal functions that +support <I>scontrol suspend</I> and <I>scontrol resume</I>. A good way to +observe the operation of the timeslicer is by running <I>watch squeue</I> in a terminal window. </P> -<H2>A Simple Example</H2> +<H2>A Simple Example</H2> <P> -The following example is configured with <I>select/linear</I>, -<I>sched/gang</I>, and <I>Shared=FORCE</I>. This example takes place on a small +The following example is configured with <I>select/linear</I>, +<I>sched/gang</I>, and <I>Shared=FORCE</I>. This example takes place on a small cluster of 5 nodes: -</P> -<PRE> -[user@n16 load]$ <B>sinfo</B> -PARTITION AVAIL TIMELIMIT NODES STATE NODELIST -active* up infinite 5 idle n[12-16] +</P> +<PRE> +[user@n16 load]$ <B>sinfo</B> +PARTITION AVAIL TIMELIMIT NODES STATE NODELIST +active* up infinite 5 idle n[12-16] </PRE> <P> Here are the Scheduler settings (the last two settings are the relevant ones): </P> -<PRE> -[user@n16 load]$ <B>scontrol show config | grep Sched</B> -FastSchedule = 1 -SchedulerPort = 7321 -SchedulerRootFilter = 1 -SchedulerTimeSlice = 30 -SchedulerType = sched/gang -[user@n16 load]$ -</PRE> -<P> -The <I>myload</I> script launches a simple load-generating app that runs +<PRE> +[user@n16 load]$ <B>scontrol show config | grep Sched</B> +FastSchedule = 1 +SchedulerPort = 7321 +SchedulerRootFilter = 1 +SchedulerTimeSlice = 30 +SchedulerType = sched/gang +</PRE> +<P> +The <I>myload</I> script launches a simple load-generating app that runs for the given number of seconds. Submit <I>myload</I> to run on all nodes: </P> -<PRE> -[user@n16 load]$ <B>sbatch -N5 ./myload 300</B> -sbatch: Submitted batch job 3 -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 3 active myload user 0:05 5 n[12-16] +<PRE> +[user@n16 load]$ <B>sbatch -N5 ./myload 300</B> +sbatch: Submitted batch job 3 + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 3 active myload user 0:05 5 n[12-16] </PRE> <P> Submit it again and watch the <I>sched/gang</I> plugin suspend it: </P> -<PRE> -[user@n16 load]$ <B>sbatch -N5 ./myload 300</B> -sbatch: Submitted batch job 4 -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 3 active myload user R 0:13 5 n[12-16] - 4 active myload user S 0:00 5 n[12-16] +<PRE> +[user@n16 load]$ <B>sbatch -N5 ./myload 300</B> +sbatch: Submitted batch job 4 + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 3 active myload user R 0:13 5 n[12-16] + 4 active myload user S 0:00 5 n[12-16] </PRE> <P> -After 30 seconds the <I>sched/gang</I> plugin swaps jobs, and now job 4 is the +After 30 seconds the <I>sched/gang</I> plugin swaps jobs, and now job 4 is the active one: </P> -<PRE> -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 4 active myload user R 0:08 5 n[12-16] - 3 active myload user S 0:41 5 n[12-16] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 4 active myload user R 0:21 5 n[12-16] - 3 active myload user S 0:41 5 n[12-16] +<PRE> +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 4 active myload user R 0:08 5 n[12-16] + 3 active myload user S 0:41 5 n[12-16] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 4 active myload user R 0:21 5 n[12-16] + 3 active myload user S 0:41 5 n[12-16] +</PRE> +<P> +After another 30 seconds the <I>sched/gang</I> plugin sets job 3 running again: +</P> +<PRE> +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 3 active myload user R 0:50 5 n[12-16] + 4 active myload user S 0:30 5 n[12-16] +</PRE> + +<P> +<B>A possible side effect of timeslicing</B>: Note that jobs that are +immediately suspended may cause their srun commands to produce the following +output: +</P> +<PRE> +[user@n16 load]$ <B>cat slurm-4.out</B> +srun: Job step creation temporarily disabled, retrying +srun: Job step creation still disabled, retrying +srun: Job step creation still disabled, retrying +srun: Job step creation still disabled, retrying +srun: Job step created +</PRE> +<P> +This occurs because <I>srun</I> is attempting to launch a jobstep in an +allocation that has been suspended. The <I>srun</I> process will continue in a +retry loop to launch the jobstep until the allocation has been resumed and the +jobstep can be launched. +</P> +<P> +When the <I>sched/gang</I> plugin is enabled, this type of output in the user +jobs should be considered benign. +</P> + +<H2>More examples</H2> + +<P> +The following example shows how the timeslicer algorithm keeps the resources +busy. Job 10 runs continually, while jobs 9 and 11 are timesliced: +</P> + +<PRE> +[user@n16 load]$ <B>sbatch -N3 ./myload 300</B> +sbatch: Submitted batch job 9 + +[user@n16 load]$ <B>sbatch -N2 ./myload 300</B> +sbatch: Submitted batch job 10 + +[user@n16 load]$ <B>sbatch -N3 ./myload 300</B> +sbatch: Submitted batch job 11 + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 9 active myload user R 0:11 3 n[12-14] + 10 active myload user R 0:08 2 n[15-16] + 11 active myload user S 0:00 3 n[12-14] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 10 active myload user R 0:50 2 n[15-16] + 11 active myload user R 0:12 3 n[12-14] + 9 active myload user S 0:41 3 n[12-14] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 10 active myload user R 1:04 2 n[15-16] + 11 active myload user R 0:26 3 n[12-14] + 9 active myload user S 0:41 3 n[12-14] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 9 active myload user R 0:46 3 n[12-14] + 10 active myload user R 1:13 2 n[15-16] + 11 active myload user S 0:30 3 n[12-14] </PRE> -<P> After another 30 seconds the <I>sched/gang</I> plugin sets job 3 running again: -</P> -<PRE> -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 3 active myload user R 0:50 5 n[12-16] - 4 active myload user S 0:30 5 n[12-16] -</PRE> -<P> -<B>A possible side effect of timeslicing</B>: Note that jobs that are -immediately suspended may cause their srun commands to produce the following -output: -</P> -<PRE> -[user@n16 load]$ <B>cat slurm-4.out</B> -srun: Job step creation temporarily disabled, retrying -srun: Job step creation still disabled, retrying -srun: Job step creation still disabled, retrying -srun: Job step creation still disabled, retrying -srun: Job step created +</P> +<P> +The next example displays "local backfilling": +</P> +<PRE> +[user@n16 load]$ <B>sbatch -N3 ./myload 300</B> +sbatch: Submitted batch job 12 + +[user@n16 load]$ <B>sbatch -N5 ./myload 300</B> +sbatch: Submitted batch job 13 + +[user@n16 load]$ <B>sbatch -N2 ./myload 300</B> +sbatch: Submitted batch job 14 + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 12 active myload user R 0:14 3 n[12-14] + 14 active myload user R 0:06 2 n[15-16] + 13 active myload user S 0:00 5 n[12-16] </PRE> -<P> -This occurs because <I>srun</I> is attempting to launch a jobstep in an -allocation that has been suspended. The <I>srun</I> process will continue in a -retry loop to launch the jobstep until the allocation has been resumed and the -jobstep can be launched. -</P> -<P> -When the <I>sched/gang</I> plugin is enabled, this type of output in the user -jobs should be considered benign. -</P> - -<H2>More examples</H2> -<P> -The following example shows how the timeslicer algorithm keeps the resources -busy. Job 10 runs continually, while jobs 9 and 11 are timesliced: -</P> -<PRE> -[user@n16 load]$ <B>sbatch -N3 ./myload 300</B> -sbatch: Submitted batch job 9 -[user@n16 load]$ <B>sbatch -N2 ./myload 300</B> -sbatch: Submitted batch job 10 -[user@n16 load]$ <B>sbatch -N3 ./myload 300</B> -sbatch: Submitted batch job 11 -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 9 active myload user R 0:11 3 n[12-14] - 10 active myload user R 0:08 2 n[15-16] - 11 active myload user S 0:00 3 n[12-14] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 10 active myload user R 0:50 2 n[15-16] - 11 active myload user R 0:12 3 n[12-14] - 9 active myload user S 0:41 3 n[12-14] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 10 active myload user R 1:04 2 n[15-16] - 11 active myload user R 0:26 3 n[12-14] - 9 active myload user S 0:41 3 n[12-14] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 9 active myload user R 0:46 3 n[12-14] - 10 active myload user R 1:13 2 n[15-16] - 11 active myload user S 0:30 3 n[12-14] -[user@n16 load]$ -</PRE> -</P> -<P> -The next example displays "local backfilling": -</P> -<PRE> -[user@n16 load]$ <B>sbatch -N3 ./myload 300</B> -sbatch: Submitted batch job 12 -[user@n16 load]$ <B>sbatch -N5 ./myload 300</B> -sbatch: Submitted batch job 13 -[user@n16 load]$ <B>sbatch -N2 ./myload 300</B> -sbatch: Submitted batch job 14 -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 12 active myload user R 0:14 3 n[12-14] - 14 active myload user R 0:06 2 n[15-16] - 13 active myload user S 0:00 5 n[12-16] -[user@n16 load]$ -</PRE> -<P> -Without timeslicing and without the backfill scheduler enabled, job 14 has to -wait for job 13 to finish. -</P><P> -This is called "local" backfilling because the backfilling only occurs with jobs -close enough in the queue to get allocated by the scheduler as part of -oversubscribing the resources. Recall that the number of jobs that can -overcommit a resource is controlled by the <I>Shared=FORCE:max_share</I> value, -so this value effectively controls the scope of "local backfilling". -</P><P> -Normal backfill algorithms check <U>all</U> jobs in the wait queue. -</P> - -<H2>Consumable Resource Examples</H2> -<P> -The following two examples illustrate the primary difference between -<I>CR_CPU</I> and <I>CR_Core</I> when consumable resource selection is enabled -(<I>select/cons_res</I>). -</P> -<P> -When <I>CR_CPU</I> (or <I>CR_CPU_Memory</I>) is configured then the selector -treats the CPUs as simple, <I>interchangeable</I> computing resources. However -when <I>CR_Core</I> (or <I>CR_Core_Memory</I>) is enabled the selector treats -the CPUs as individual resources that are <U>specifically</U> allocated to jobs. -This subtle difference is highlighted when timeslicing is enabled. -</P> -<P> -In both examples 6 jobs are submitted. Each job requests 2 CPUs per node, and -all of the nodes contain two quad-core processors. The timeslicer will initially -let the first 4 jobs run and suspend the last 2 jobs. The manner in which these -jobs are timesliced depends upon the configured <I>SelectTypeParameter</I>. -</P> -<P> -In the first example <I>CR_Core_Memory</I> is configured. Note that jobs 46 and -47 don't <U>ever</U> get suspended. This is because they are not sharing their -cores with any other job. Jobs 48 and 49 were allocated to the same cores as -jobs 45 and 46. The timeslicer recognizes this and timeslices only those jobs: -</P> -<PRE> -[user@n16 load]$ <B>sinfo</B> -PARTITION AVAIL TIMELIMIT NODES STATE NODELIST -active* up infinite 5 idle n[12-16] -[user@n16 load]$ <B>scontrol show config | grep Select</B> -SelectType = select/cons_res -SelectTypeParameters = CR_CORE_MEMORY -[user@n16 load]$ <B>sinfo -o "%20N %5D %5c %5z"</B> -NODELIST NODES CPUS S:C:T -n[12-16] 5 8 2:4:1 -[user@n16 load]$ -[user@n16 load]$ -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 44 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 45 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 46 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 47 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 48 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 49 -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 44 active myload user R 0:09 5 n[12-16] - 45 active myload user R 0:08 5 n[12-16] - 46 active myload user R 0:08 5 n[12-16] - 47 active myload user R 0:07 5 n[12-16] - 48 active myload user S 0:00 5 n[12-16] - 49 active myload user S 0:00 5 n[12-16] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 46 active myload user R 0:49 5 n[12-16] - 47 active myload user R 0:48 5 n[12-16] - 48 active myload user R 0:06 5 n[12-16] - 49 active myload user R 0:06 5 n[12-16] - 44 active myload user S 0:44 5 n[12-16] - 45 active myload user S 0:43 5 n[12-16] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 44 active myload user R 1:23 5 n[12-16] - 45 active myload user R 1:22 5 n[12-16] - 46 active myload user R 2:22 5 n[12-16] - 47 active myload user R 2:21 5 n[12-16] - 48 active myload user S 1:00 5 n[12-16] - 49 active myload user S 1:00 5 n[12-16] -[user@n16 load]$ -</PRE> -<P> -Note the runtime of all 6 jobs in the output of the last <I>squeue</I> command. -Jobs 46 and 47 have been running continuously, while jobs 45 and 46 are -splitting their runtime with jobs 48 and 49. -</P><P> -The next example has <I>CR_CPU_Memory</I> configured and the same 6 jobs are -submitted. Here the selector and the timeslicer treat the CPUs as countable -resources which results in all 6 jobs sharing time on the CPUs: -</P> -<PRE> -[user@n16 load]$ <B>sinfo</B> -PARTITION AVAIL TIMELIMIT NODES STATE NODELIST -active* up infinite 5 idle n[12-16] -[user@n16 load]$ <B>scontrol show config | grep Select</B> -SelectType = select/cons_res -SelectTypeParameters = CR_CPU_MEMORY -[user@n16 load]$ <B>sinfo -o "%20N %5D %5c %5z"</B> -NODELIST NODES CPUS S:C:T -n[12-16] 5 8 2:4:1 -[user@n16 load]$ -[user@n16 load]$ -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 51 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 52 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 53 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 54 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 55 -[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> -sbatch: Submitted batch job 56 -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 51 active myload user R 0:11 5 n[12-16] - 52 active myload user R 0:11 5 n[12-16] - 53 active myload user R 0:10 5 n[12-16] - 54 active myload user R 0:09 5 n[12-16] - 55 active myload user S 0:00 5 n[12-16] - 56 active myload user S 0:00 5 n[12-16] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 51 active myload user R 1:09 5 n[12-16] - 52 active myload user R 1:09 5 n[12-16] - 55 active myload user R 0:23 5 n[12-16] - 56 active myload user R 0:23 5 n[12-16] - 53 active myload user S 0:45 5 n[12-16] - 54 active myload user S 0:44 5 n[12-16] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 53 active myload user R 0:55 5 n[12-16] - 54 active myload user R 0:54 5 n[12-16] - 55 active myload user R 0:40 5 n[12-16] - 56 active myload user R 0:40 5 n[12-16] - 51 active myload user S 1:16 5 n[12-16] - 52 active myload user S 1:16 5 n[12-16] -[user@n16 load]$ <B>squeue</B> -JOBID PARTITION NAME USER ST TIME NODES NODELIST - 51 active myload user R 3:18 5 n[12-16] - 52 active myload user R 3:18 5 n[12-16] - 53 active myload user R 3:17 5 n[12-16] - 54 active myload user R 3:16 5 n[12-16] - 55 active myload user S 3:00 5 n[12-16] - 56 active myload user S 3:00 5 n[12-16] -[user@n16 load]$ -</PRE> -<P> -Note that the runtime of all 6 jobs is roughly equal. Jobs 51-54 ran first so -they're slightly ahead, but so far all jobs have run for at least 3 minutes. -</P><P> -At the core level this means that SLURM relies on the linux kernel to move jobs -around on the cores to maximize performance. This is different than when -<I>CR_Core_Memory</I> was configured and the jobs would effectively remain -"pinned" to their specific cores for the duration of the job. Note that -<I>CR_Core_Memory</I> supports CPU binding, while <I>CR_CPU_Memory</I> does not. +<P> +Without timeslicing and without the backfill scheduler enabled, job 14 has to +wait for job 13 to finish. +</P> +<P> +This is called "local" backfilling because the backfilling only occurs with jobs +close enough in the queue to get allocated by the scheduler as part of +oversubscribing the resources. Recall that the number of jobs that can +overcommit a resource is controlled by the <I>Shared=FORCE:max_share</I> value, +so this value effectively controls the scope of "local backfilling". +</P> +<P> +Normal backfill algorithms check <U>all</U> jobs in the wait queue. +</P> + +<H2>Consumable Resource Examples</H2> + +<P> +The following two examples illustrate the primary difference between +<I>CR_CPU</I> and <I>CR_Core</I> when consumable resource selection is enabled +(<I>select/cons_res</I>). +</P> +<P> +When <I>CR_CPU</I> (or <I>CR_CPU_Memory</I>) is configured then the selector +treats the CPUs as simple, <I>interchangeable</I> computing resources. However +when <I>CR_Core</I> (or <I>CR_Core_Memory</I>) is enabled the selector treats +the CPUs as individual resources that are <U>specifically</U> allocated to jobs. +This subtle difference is highlighted when timeslicing is enabled. +</P> +<P> +In both examples 6 jobs are submitted. Each job requests 2 CPUs per node, and +all of the nodes contain two quad-core processors. The timeslicer will initially +let the first 4 jobs run and suspend the last 2 jobs. The manner in which these +jobs are timesliced depends upon the configured <I>SelectTypeParameter</I>. +</P> +<P> +In the first example <I>CR_Core_Memory</I> is configured. Note that jobs 46 and +47 don't <U>ever</U> get suspended. This is because they are not sharing their +cores with any other job. Jobs 48 and 49 were allocated to the same cores as +jobs 45 and 46. The timeslicer recognizes this and timeslices only those jobs: +</P> +<PRE> +[user@n16 load]$ <B>sinfo</B> +PARTITION AVAIL TIMELIMIT NODES STATE NODELIST +active* up infinite 5 idle n[12-16] + +[user@n16 load]$ <B>scontrol show config | grep Select</B> +SelectType = select/cons_res +SelectTypeParameters = CR_CORE_MEMORY + +[user@n16 load]$ <B>sinfo -o "%20N %5D %5c %5z"</B> +NODELIST NODES CPUS S:C:T +n[12-16] 5 8 2:4:1 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 44 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 45 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 46 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 47 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 48 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 49 + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 44 active myload user R 0:09 5 n[12-16] + 45 active myload user R 0:08 5 n[12-16] + 46 active myload user R 0:08 5 n[12-16] + 47 active myload user R 0:07 5 n[12-16] + 48 active myload user S 0:00 5 n[12-16] + 49 active myload user S 0:00 5 n[12-16] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 46 active myload user R 0:49 5 n[12-16] + 47 active myload user R 0:48 5 n[12-16] + 48 active myload user R 0:06 5 n[12-16] + 49 active myload user R 0:06 5 n[12-16] + 44 active myload user S 0:44 5 n[12-16] + 45 active myload user S 0:43 5 n[12-16] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 44 active myload user R 1:23 5 n[12-16] + 45 active myload user R 1:22 5 n[12-16] + 46 active myload user R 2:22 5 n[12-16] + 47 active myload user R 2:21 5 n[12-16] + 48 active myload user S 1:00 5 n[12-16] + 49 active myload user S 1:00 5 n[12-16] +</PRE> +<P> +Note the runtime of all 6 jobs in the output of the last <I>squeue</I> command. +Jobs 46 and 47 have been running continuously, while jobs 45 and 46 are +splitting their runtime with jobs 48 and 49. +</P> +<P> +The next example has <I>CR_CPU_Memory</I> configured and the same 6 jobs are +submitted. Here the selector and the timeslicer treat the CPUs as countable +resources which results in all 6 jobs sharing time on the CPUs: +</P> +<PRE> +[user@n16 load]$ <B>sinfo</B> +PARTITION AVAIL TIMELIMIT NODES STATE NODELIST +active* up infinite 5 idle n[12-16] + +[user@n16 load]$ <B>scontrol show config | grep Select</B> +SelectType = select/cons_res +SelectTypeParameters = CR_CPU_MEMORY + +[user@n16 load]$ <B>sinfo -o "%20N %5D %5c %5z"</B> +NODELIST NODES CPUS S:C:T +n[12-16] 5 8 2:4:1 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 51 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 52 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 53 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 54 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 55 + +[user@n16 load]$ <B>sbatch -n10 -N5 ./myload 300</B> +sbatch: Submitted batch job 56 + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 51 active myload user R 0:11 5 n[12-16] + 52 active myload user R 0:11 5 n[12-16] + 53 active myload user R 0:10 5 n[12-16] + 54 active myload user R 0:09 5 n[12-16] + 55 active myload user S 0:00 5 n[12-16] + 56 active myload user S 0:00 5 n[12-16] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 51 active myload user R 1:09 5 n[12-16] + 52 active myload user R 1:09 5 n[12-16] + 55 active myload user R 0:23 5 n[12-16] + 56 active myload user R 0:23 5 n[12-16] + 53 active myload user S 0:45 5 n[12-16] + 54 active myload user S 0:44 5 n[12-16] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 53 active myload user R 0:55 5 n[12-16] + 54 active myload user R 0:54 5 n[12-16] + 55 active myload user R 0:40 5 n[12-16] + 56 active myload user R 0:40 5 n[12-16] + 51 active myload user S 1:16 5 n[12-16] + 52 active myload user S 1:16 5 n[12-16] + +[user@n16 load]$ <B>squeue</B> +JOBID PARTITION NAME USER ST TIME NODES NODELIST + 51 active myload user R 3:18 5 n[12-16] + 52 active myload user R 3:18 5 n[12-16] + 53 active myload user R 3:17 5 n[12-16] + 54 active myload user R 3:16 5 n[12-16] + 55 active myload user S 3:00 5 n[12-16] + 56 active myload user S 3:00 5 n[12-16] +</PRE> +<P> +Note that the runtime of all 6 jobs is roughly equal. Jobs 51-54 ran first so +they're slightly ahead, but so far all jobs have run for at least 3 minutes. +</P> +<P> +At the core level this means that SLURM relies on the linux kernel to move jobs +around on the cores to maximize performance. This is different than when +<I>CR_Core_Memory</I> was configured and the jobs would effectively remain +"pinned" to their specific cores for the duration of the job. Note that +<I>CR_Core_Memory</I> supports CPU binding, while <I>CR_CPU_Memory</I> does not. </P> <H2>Future Work</H2> - -<P> -Priority scheduling and preemptive scheduling are other forms of gang -scheduling that are currently under development for SLURM. -</P> -<P> -<B>Making use of swap space</B>: (note that this topic is not currently -scheduled for development, unless someone would like to pursue this) It should -be noted that timeslicing does provide an interesting mechanism for high -performance jobs to make use of swap space. The optimal scenario is one in which -suspended jobs are "swapped out" and active jobs are "swapped in". The swapping -activity would only occur once every <I>SchedulerTimeslice</I> interval. -</P> -<P> -However, SLURM should first be modified to include support for scheduling jobs -into swap space and to provide controls to prevent overcommitting swap space. -For now this idea could be experimented with by disabling memory support in the -selector and submitting appropriately sized jobs. -</P> - -<p style="text-align:center;">Last modified 17 March 2008</p> + +<P> +Priority scheduling and preemptive scheduling are other forms of gang +scheduling that are currently under development for SLURM. +</P> +<P> +<B>Making use of swap space</B>: (note that this topic is not currently +scheduled for development, unless someone would like to pursue this) It should +be noted that timeslicing does provide an interesting mechanism for high +performance jobs to make use of swap space. The optimal scenario is one in which +suspended jobs are "swapped out" and active jobs are "swapped in". The swapping +activity would only occur once every <I>SchedulerTimeslice</I> interval. +</P> +<P> +However, SLURM should first be modified to include support for scheduling jobs +into swap space and to provide controls to prevent overcommitting swap space. +For now this idea could be experimented with by disabling memory support in the +selector and submitting appropriately sized jobs. +</P> + +<p style="text-align:center;">Last modified 7 July 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/header.txt b/doc/html/header.txt index 2f84c37a8589cdd4bb81c0de6460d78f80c7c596..1adedfd3ddcb58b190f617877d53730610ec8a35 100644 --- a/doc/html/header.txt +++ b/doc/html/header.txt @@ -8,8 +8,8 @@ <meta http-equiv="Pragma" content="no-cache"> <meta http-equiv="keywords" content="Simple Linux Utility for Resource Management, SLURM, resource management, Linux clusters, high-performance computing, Livermore Computing"> -<meta name="LLNLRandR" content="LLNL-WEB-402631"> -<meta name="LLNLRandRdate" content="18 December 2006"> +<meta name="LLNLRandR" content="LLNL-WEB-405518"> +<meta name="LLNLRandRdate" content="18 July 2008"> <meta name="distribution" content="global"> <meta name="description" content="Simple Linux Utility for Resource Management"> <meta name="copyright" diff --git a/doc/html/moab.shtml b/doc/html/moab.shtml index 247b3f7346c81e2ba873f1dc934d8ed9dd6506b5..3ee5c7b151bfd329ababf5671dbac4d05f95c6d9 100644 --- a/doc/html/moab.shtml +++ b/doc/html/moab.shtml @@ -198,18 +198,30 @@ that the user's environment on the execution host be loaded. Moab relies upon SLURM to perform this action, using the <i>--get-user-env</i> option for the salloc, sbatch and srun commands. The SLURM command then executes as user root a command of this sort -as user root: +as user root:</p> <pre> /bin/su - <user> -c \ "/bin/echo BEGIN; /bin/env; /bin/echo FINI" </pre> -While this command is executing within salloc, sbatch or srun, -the Moab daemon is completely non-responsive. +<p> For typical batch jobs, the job transfer from Moab to +SLURM is performed using <i>sbatch</i> and occurs instantaneously. +The environment is loadeded by a SLURM daemon (slurmd) when the +batch job begins execution. +For interactive jobs (<i>msub -I ...</i>), the job transfer +from Moab to SLURM can not be completed until the environment +variables are loaded, during which time the Moab daemon is +completely non-responsive. To insure that Moab remains operational, SLURM will abort the above -command within a few seconds and look for a cache file with the -user's environment and use that if found. +command within a configurable period of time and look for a cache +file with the user's environment and use that if found. Otherwise an error is reported to Moab. -We have provided a simple program that can be used to build +The time permitted for loading the current environment +before searching for a cache file is configurable using +the <i>GetEnvTimeout</i> parameter in SLURM's configuration +file, slurm.conf. A value of zero results in immediately +using the cache file. The default value is 2 seconds.</p> + +<p>We have provided a simple program that can be used to build cache files for users. The program can be found in the SLURM distribution at <i>contribs/env_cache_builder.c</i>. This program can support a longer timeout than Moab, but @@ -247,6 +259,6 @@ Write the output to a file with the same name as the user in the <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 23 April 2008</p> +<p style="text-align:center;">Last modified 10 July 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/preempt.shtml b/doc/html/preempt.shtml index d58acf0032483a69c2fc9033c28b17dcb7546a58..2f9bd34df495a63c656d520d171b288a56bc5857 100644 --- a/doc/html/preempt.shtml +++ b/doc/html/preempt.shtml @@ -44,10 +44,15 @@ chosen, we recommend setting <I>SelectTypeParameter=CR_Memory</I>. When (ex. <I>SelectTypeParameter=CR_Core_Memory</I>). </LI> <LI> -<B>DefMemPerTask</B>: Since job requests may not explicitly specify -a memory requirement, we also recommend configuring <I>DefMemPerTask</I> -(default memory per task). It may also be desirable to configure -<I>MaxMemPerTask</I> (maximum memory per task) in <I>slurm.conf</I>. +<B>DefMemPerCPU</B>: Since job requests may not explicitly specify +a memory requirement, we also recommend configuring +<I>DefMemPerCPU</I> (default memory per allocated CPU) or +<I>DefMemPerNode</I> (default memory per allocated node). +It may also be desirable to configure +<I>MaxMemPerCPU</I> (maximum memory per allocated CPU) or +<I>MaxMemPerNode</I> (maximum memory per allocated node) in <I>slurm.conf</I>. +Users can use the <I>--mem</I> or <I>--mem-per-cpu</I> option +at job submission time to specify their memory requirements. </LI> <LI> <B>JobAcctGatherType and JobAcctGatherFrequency</B>: @@ -158,7 +163,6 @@ Here are the Partition settings: [user@n16 ~]$ <B>grep PartitionName /shared/slurm/slurm.conf</B> PartitionName=active Priority=1 Default=YES Shared=FORCE:1 Nodes=n[12-16] PartitionName=hipri Priority=2 Shared=FORCE:1 Nodes=n[12-16] -[user@n16 ~]$ </PRE> <P> The <I>runit.pl</I> script launches a simple load-generating app that runs @@ -183,7 +187,6 @@ JOBID PARTITION NAME USER ST TIME NODES NODELIST 487 active runit.pl user R 0:05 1 n14 488 active runit.pl user R 0:05 1 n15 489 active runit.pl user R 0:04 1 n16 -[user@n16 ~]$ </PRE> <P> Now submit a short-running 3-node job to the <I>hipri</I> partition: @@ -199,7 +202,6 @@ JOBID PARTITION NAME USER ST TIME NODES NODELIST 486 active runit.pl user S 0:27 1 n13 487 active runit.pl user S 0:26 1 n14 490 hipri runit.pl user R 0:03 3 n[12-14] -[user@n16 ~]$ </PRE> <P> Job 490 in the <I>hipri</I> partition preempted jobs 485, 486, and 487 from @@ -218,7 +220,6 @@ JOBID PARTITION NAME USER ST TIME NODES NODELIST 487 active runit.pl user R 0:29 1 n14 488 active runit.pl user R 0:59 1 n15 489 active runit.pl user R 0:58 1 n16 -[user@n16 ~]$ </PRE> @@ -242,6 +243,6 @@ again. This will be investigated at some point in the future. Requeuing a preempted job may make the most sense with <I>Shared=NO</I> partitions. </P> -<p style="text-align:center;">Last modified 11 April 2008</p> +<p style="text-align:center;">Last modified 7 July 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/quickstart.shtml b/doc/html/quickstart.shtml index 7eac62a42a4221c6a27f2233c9e0699290f39023..af47c69404cd1dcde3efc18806716c4f63d2bb82 100644 --- a/doc/html/quickstart.shtml +++ b/doc/html/quickstart.shtml @@ -305,22 +305,31 @@ The <span class="commandline">salloc</span> command is would be used to create a resource allocation and typically start a shell within that allocation. One or more job steps would typically be executed within that allocation -using the srun command to launch the tasks. -Finally the shell created by salloc would be terminated using the -<i>exit</i> command. -In this example we will also use the <span class="commandline">sbcast</span> -command to transfer the executable program to local storage, /tmp/joe.a.out, -on the allocated nodes (1024 nodes in this example). +using the <span class="commandline">srun</span> command to launch the tasks +(depending upon the type of MPI being used, the launch mechanism may +differ, see <a href="#mpi">MPI</a> details below). +Finally the shell created by <span class="commandline">salloc</span> would +be terminated using the <i>exit</i> command. +SLURM does not automatically migrate executable or data files +to the nodes allocated to a job. +Either the files must exists on local disk or in some global file system +(e.g. NFS or Lustre). +We provide the tool <span class="commandline">sbcast</span> to transfer +files to local storage on allocated nodes using SLURM's hierarchical +communications. +In this example we use <span class="commandline">sbcast</span> to transfer +the executable program <i>a.out</i> to <i>/tmp/joe.a.out</i> on local storage +of the allocated nodes. After executing the program, we delete it from local storage</p> <pre> tux0: salloc -N1024 bash $ sbcast a.out /tmp/joe.a.out Granted job allocation 471 $ srun /tmp/joe.a.out -Result is 471 +Result is 3.14159 $ srun rm /tmp/joe.a.out $ exit -salloc: Relinquishing job allocation 1234 +salloc: Relinquishing job allocation 471 </pre> <p>In this example, we submit a batch job, get its status, and cancel it. </p> @@ -568,6 +577,6 @@ sbatch: Submitted batch job 1234 tasks. These tasks are not managed by SLURM since they are launched outside of its control.</p> -<p style="text-align:center;">Last modified 2 June 2008</p> +<p style="text-align:center;">Last modified 16 July 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am index e8d06ea88aba3a08ac10a5000695c3792cecd588..ecc4cfe72ccffc7575759e21d7a69dbef1ae7288 100644 --- a/doc/man/Makefile.am +++ b/doc/man/Makefile.am @@ -60,6 +60,7 @@ man3_MANS = man3/slurm_hostlist_create.3 \ man3/slurm_kill_job.3 \ man3/slurm_kill_job_step.3 \ man3/slurm_load_ctl_conf.3 \ + man3/slurm_load_job.3 \ man3/slurm_load_jobs.3 \ man3/slurm_load_node.3 \ man3/slurm_load_partitions.3 \ diff --git a/doc/man/Makefile.in b/doc/man/Makefile.in index 109e32167ad8f1da5a58855fd74ed050377cd98a..7aaf763ccfea94d2593a7413d9e529e73c3384c5 100644 --- a/doc/man/Makefile.in +++ b/doc/man/Makefile.in @@ -301,6 +301,7 @@ man3_MANS = man3/slurm_hostlist_create.3 \ man3/slurm_kill_job.3 \ man3/slurm_kill_job_step.3 \ man3/slurm_load_ctl_conf.3 \ + man3/slurm_load_job.3 \ man3/slurm_load_jobs.3 \ man3/slurm_load_node.3 \ man3/slurm_load_partitions.3 \ diff --git a/doc/man/man1/sacctmgr.1 b/doc/man/man1/sacctmgr.1 index 5c29d74d5bf5e0989da6611e733b4b7a47bfad6c..01dad39b3c0fbb17077aa902e552b3d0f4220dcb 100644 --- a/doc/man/man1/sacctmgr.1 +++ b/doc/man/man1/sacctmgr.1 @@ -20,7 +20,7 @@ These parameters are \fIuser\fR, \fIcluster\fR, \fIpartition\fR, and the \fIClusterName\fR parameter in the \fIslurm.conf\fR configuration file. \fIpartition\fR is the name of a Slurm partition on that cluster. \fIaccount\fR is the bank account for a job. -The intended mode of operation is to initiate the \fBssacctmgr\fR command, +The intended mode of operation is to initiate the \fBsacctmgr\fR command, add, delete, modify, and/or list \fIassociation\fR records then commit the changes and exit. @@ -46,35 +46,35 @@ commit changes immediately. .TP \fB\-n\fR, \fB\-\-no_header\fR -Don't display header when listing results. +No header will be added to the beginning of the output. .TP \fB\-o\fR, \fB\-\-oneliner\fR -Print information one line per record. +Report output one record per line. This is equivalent to the \fBoneliner\fR command. .TP -\fB\-n\fR, \fB\-\-parsable\fR -Make output '|' delimited. +\fB\-p\fR, \fB\-\-parsable\fR +Output will be '|' delimited. .TP \fB\-q\fR, \fB\-\-quiet\fR -Print no warning or informational messages, only error messages. +Print no messages other than error messages. This is equivalent to the \fBquiet\fR command. .TP \fB\-s\fR, \fB\-\-associations\fR -Show an association for entities displayed. +Use with show or list to display associations with the entity. This is equivalent to the \fBassociations\fR command. .TP \fB\-v\fR, \fB\-\-verbose\fR -Print detailed event logging. +Enable detailed logging. This is equivalent to the \fBverbose\fR command. .TP \fB\-V\fR , \fB\-\-version\fR -Print version information and exit. +Display version number. This is equivalent to the \fBversion\fR command. .SH "COMMANDS" @@ -90,7 +90,7 @@ Identical to the \fBcreate\fR command. .TP \fBassociations\fR -Show associations for entities displayed. +Use with show or list to display associations with the entity. .TP \fBcreate\fR <\fIENTITY\fR> <\fISPECS\fR> @@ -103,7 +103,7 @@ Delete the specified entities. .TP \fBexit\fP -Terminate the execution of sacctmgr. +Terminate sacctmgr. Identical to the \fBquit\fR command. .TP @@ -122,15 +122,15 @@ Identical to the \fBshow\fR command. .TP \fBmodify\fR <\fIENTITY\fR> \fbwith\fR <\fISPECS\fR> \fbset\fR <\fISPECS\fR> -Modify an entities. +Modify an entity. .TP \fBoneliner\fP -Print information one line per record. +Output one record per line. .TP \fBquiet\fP -Print no warning or informational messages, only fatal error messages. +Print no messages other than error messages. .TP \fBquit\fP @@ -145,17 +145,17 @@ Identical to the \fBlist\fR command. .TP \fBverbose\fP -Print detailed event logging. +Enable detailed logging. This includes time\-stamps on data structures, record counts, etc. This is an independent command with no options meant for use in interactive mode. .TP \fBversion\fP -Display the version number of sacctmgr being executed. +Display the version number of sacctmgr. .TP \fB!!\fP -Repeat the last command executed. +Repeat the last command. .TP \fBENTITIES\fR @@ -179,6 +179,21 @@ The entity used to group information consisting of four parameters: The \fIClusterName\fR parameter in the \fIslurm.conf\fR configuration file, used to differentiate accounts from on different machines. +.TP +\fIcoordinator\fR +A special priviaged user usually an account manager or such that can +add users or sub accounts to the account they are coordinator over. +This should be a trusted person since they can change limits on +account and user associations inside their realm. + +.TP +\fIqos\fR +Quality of Service (For use with MOAB only). + +.TP +\fItransaction\fR +List of transactions that have occured during a given time period. + .TP \fIuser\fR The login name. @@ -188,6 +203,7 @@ The login name. .TP \fICluster\fP=<cluster> Specific cluster to add account to. Default is all in system. +.TP \fIDescription\fP=<description> An arbitrary string describing an account. .TP @@ -279,6 +295,24 @@ Quality of Service jobs are to run at for this account. Now consisting of Normal, Standby, Expedite, and Exempt. This is overridden if set directly on an account or user. +.TP +\fBSPECIFICATIONS FOR COORDINATOR\fR +.TP +\fIAccountsfP=<comma separated list of account names> +Account name to add this user as a coordinator to. +.TP +\fINames\fP=<comma separated list of user names> +Names of coordinators. + +.TP +\fBSPECIFICATIONS FOR QOS\fR +.TP +\fIDescription\fP=<description> +An arbitrary string describing an account. +.TP +\fINames\fP=<qos> +Names of qos. + .TP \fBSPECIFICATIONS FOR USERS\fR .TP diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index 72a8bcdb59faa95ac8eb3d090dc780317ff03b39..356f916d006bd7c2accba7bb63bd135b23458205 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -1,4 +1,4 @@ -.TH "salloc" "1" "SLURM 1.3" "May 2008" "SLURM Commands" +.TH "salloc" "1" "SLURM 1.3" "July 2008" "SLURM Commands" .SH "NAME" .LP salloc \- Obtain a SLURM job allocation (a set of nodes), execute a command, and then release the allocation when the command is finished. @@ -306,12 +306,24 @@ The default value is the username of the submitting user. .TP \fB\-\-mem\fR[=]<\fIMB\fR> Specify the real memory required per node in MegaBytes. -If a value is specified, that quantity of memory will be -reserved for this job. -If no value is specified and real memory is exhausted on -any allocated node then the job is subject to cancellation. -Also see \fB\-\-task\-mem\fR. - +Default value is \fBDefMemPerNode\fR and the maximum value is +\fBMaxMemPerNode\fR. If configured, both of parameters can be +seen using the \fBscontrol show config\fR command. +This parameter would generally be used of whole nodes +are allocated to jobs (\fBSelectType=select/linear\fR). +Also see \fB\-\-mem\-per\-cpu\fR. +\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive. + +.TP +\fB\-\-mem\-per\-cpu\fR[=]<\fIMB\fR> +Mimimum memory required per allocated CPU in MegaBytes. +Default value is \fBDefMemPerCPU\fR and the maximum value is +\fBMaxMemPerCPU\fR. If configured, both of parameters can be +seen using the \fBscontrol show config\fR command. +This parameter would generally be used of individual processors +are allocated to jobs (\fBSelectType=select/cons_res\fR). +Also see \fB\-\-mem\fR. +\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive. .TP \fB\-\-mincores\fR[=]<\fIn\fR> @@ -495,13 +507,6 @@ Acceptable time formats include "minutes", "minutes:seconds", "hours:minutes:seconds", "days\-hours", "days\-hours:minutes" and "days\-hours:minutes:seconds". -.TP -\fB\-\-task\-mem\fR[=]<\fIMB\fR> -Mimimum memory available per task in MegaBytes. -Default value is \fBDefMemPerTask\fR and the maximum value is -\fBMaxMemPerTask\fR, both of which can be seen using the -\fBscontrol show config\fR command. - .TP \fB\-\-tmp\fR[=]<\fIMB\fR> Specify a minimum amount of temporary disk space. @@ -709,6 +714,7 @@ salloc \-N5 srun \-n10 myprogram .SH "COPYING" Copyright (C) 2006\-2007 The Regents of the University of California. +Copyright (C) 2008 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). LLNL\-CODE\-402394. .LP diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index 03accb92bfec2f88a4b977225ab4a960d1835ae9..918f7843130e5d4f46cb0c461947af6ff4f40339 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -1,4 +1,4 @@ -.TH "sbatch" "1" "SLURM 1.3" "May 2008" "SLURM Commands" +.TH "sbatch" "1" "SLURM 1.3" "July 2008" "SLURM Commands" .SH "NAME" .LP sbatch \- Submit a batch script to SLURM. @@ -330,11 +330,24 @@ The default value is the username of the submitting user. .TP \fB\-\-mem\fR[=]<\fIMB\fR> Specify the real memory required per node in MegaBytes. -If a value is specified, that quantity of memory will be -reserved for this job. -If no value is specified and real memory is exhausted on -any allocated node then the job is subject to cancellation. -Also see \fB\-\-task\-mem\fR. +Default value is \fBDefMemPerNode\fR and the maximum value is +\fBMaxMemPerNode\fR. If configured, both of parameters can be +seen using the \fBscontrol show config\fR command. +This parameter would generally be used of whole nodes +are allocated to jobs (\fBSelectType=select/linear\fR). +Also see \fB\-\-mem\-per\-cpu\fR. +\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive. + +.TP +\fB\-\-mem\-per\-cpu\fR[=]<\fIMB\fR> +Mimimum memory required per allocated CPU in MegaBytes. +Default value is \fBDefMemPerCPU\fR and the maximum value is +\fBMaxMemPerCPU\fR. If configured, both of parameters can be +seen using the \fBscontrol show config\fR command. +This parameter would generally be used of individual processors +are allocated to jobs (\fBSelectType=select/cons_res\fR). +Also see \fB\-\-mem\fR. +\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive. .TP \fB\-\-mincores\fR[=]<\fIn\fR> @@ -582,13 +595,6 @@ Acceptable time formats include "minutes", "minutes:seconds", "hours:minutes:seconds", "days\-hours", "days\-hours:minutes" and "days\-hours:minutes:seconds". -.TP -\fB\-\-task\-mem\fR[=]<\fIMB\fR> -Mimimum memory available per task in MegaBytes. -Default value is \fBDefMemPerTask\fR and the maximum value is -\fBMaxMemPerTask\fR, both of which can be seen using the -\fBscontrol show config\fR command. - .TP \fB\-\-tasks\-per\-node\fR[=]<\fIn\fR> Specify the number of tasks to be launched per node. @@ -867,6 +873,7 @@ host4 .SH "COPYING" Copyright (C) 2006\-2007 The Regents of the University of California. +Copyright (C) 2008 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). LLNL\-CODE\-402394. .LP diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index afe14ad594fa4022d3e53ab6be8394657f0ed267..5aca020e0194eff6dcf6d90eb4b7e9943694515f 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -1,6 +1,4 @@ -.\" $Id: srun.1 14123 2008-05-23 20:22:46Z jette $ -.\" -.TH SRUN "1" "May 2008" "srun 1.3" "slurm components" +.TH SRUN "1" "July 2008" "srun 1.3" "slurm components" .SH "NAME" srun \- run parallel jobs @@ -425,11 +423,24 @@ The default value is the submitting user. .TP \fB\-\-mem\fR[=]<\fIMB\fR> Specify the real memory required per node in MegaBytes. -If a value is specified, that quantity of memory will be -reserved for this job. -If no value is specified and real memory is exhausted on -any allocated node then the job is subject to cancellation. -Also see \fB\-\-task\-mem\fR. +Default value is \fBDefMemPerNode\fR and the maximum value is +\fBMaxMemPerNode\fR. If configured, both of parameters can be +seen using the \fBscontrol show config\fR command. +This parameter would generally be used of whole nodes +are allocated to jobs (\fBSelectType=select/linear\fR). +Also see \fB\-\-mem\-per\-cpu\fR. +\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive. + +.TP +\fB\-\-mem\-per\-cpu\fR[=]<\fIMB\fR> +Mimimum memory required per allocated CPU in MegaBytes. +Default value is \fBDefMemPerCPU\fR and the maximum value is +\fBMaxMemPerCPU\fR. If configured, both of parameters can be +seen using the \fBscontrol show config\fR command. +This parameter would generally be used of individual processors +are allocated to jobs (\fBSelectType=select/cons_res\fR). +Also see \fB\-\-mem\fR. +\fB\-\-mem\fR and \fB\-\-mem\-per\-cpu\fR are mutually exclusive. .TP \fB\-\-mem_bind\fR=[{\fIquiet,verbose\fR},]\fItype\fR @@ -843,13 +854,6 @@ in slurm.conf is executed. This is meant to be a very short\-lived program. If it fails to terminate within a few seconds, it will be killed along with any descendant processes. -.TP -\fB\-\-task\-mem\fR[=]<\fIMB\fR> -Mimimum memory available per task in MegaBytes. -Default value is \fBDefMemPerTask\fR and the maximum value is -\fBMaxMemPerTask\fR, both of which can be seen using the -\fBscontrol show config\fR command. - .TP \fB\-\-task\-prolog\fR=\fIexecutable\fR The \fBslurmd\fR daemon will run \fIexecutable\fR just before launching @@ -1624,6 +1628,7 @@ wait .SH "COPYING" Copyright (C) 2006\-2007 The Regents of the University of California. +Copyright (C) 2008 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). LLNL\-CODE\-402394. .LP diff --git a/doc/man/man3/slurm_checkpoint_error.3 b/doc/man/man3/slurm_checkpoint_error.3 index 3a5fcf213939dd1625f6e33a5ae78a919e99f4e3..f79cb65ece85f656d12f8399a1863cd2ddd7764c 100644 --- a/doc/man/man3/slurm_checkpoint_error.3 +++ b/doc/man/man3/slurm_checkpoint_error.3 @@ -134,14 +134,14 @@ This can be issued as needed to prevent checkpointing while a job step is in a critical section or for other reasons. .LP \fBslurm_checkpoint_enable\fR -Make the indentified job step checkpointable. +Make the identified job step checkpointable. .LP \fBslurm_checkpoint_error\fR Get error information about the last checkpoint operation for a given job step. .LP \fBslurm_checkpoint_restart\fR Request that a previously checkpointed job resume execution. -It may continue execution on differrent nodes than were +It may continue execution on different nodes than were originally used. Execution may be delayed if resources are not immediately available. diff --git a/doc/man/man3/slurm_free_job_info_msg.3 b/doc/man/man3/slurm_free_job_info_msg.3 index 4471ae0cb5a724fc48f8db7bbbf466a1d0227e77..c31031800d449891115eb33dd31b072d6ef8fccf 100644 --- a/doc/man/man3/slurm_free_job_info_msg.3 +++ b/doc/man/man3/slurm_free_job_info_msg.3 @@ -25,6 +25,15 @@ void \fBslurm_free_job_info_msg\fR ( .br ); .LP +int \fBslurm_load_job\fR ( +.br + job_info_msg_t **\fIjob_info_msg_pptr\fP, +.br + uint32_t \fIjob_id\fP +.br +); +.LP +.LP int \fBslurm_load_jobs\fR ( .br time_t \fIupdate_time\fP, @@ -53,7 +62,7 @@ int \fBslurm_get_end_time\fR ( .LP long \fBslurm_get_rem_time\fR ( .br - uint32_t \fIjobid\fP + uint32_t \fIjob_id\fP .br ); .LP @@ -136,11 +145,13 @@ Specifies a pointer to a storage location into which a Slurm job id may be placed. .TP \fIjob_info_msg_ptr\fP -Specifies the pointer to the structure created by \fBslurm_load_jobs\fR. +Specifies the pointer to the structure created by \fBslurm_load_job\fR +or \fBslurm_load_jobs\fR. .TP \fIjobinfo\fP Job\-specific information as constructed by Slurm's NodeSelect plugin. -This data object is returned for each job by the \fBslurm_load_jobs\fR function. +This data object is returned for each job by the \fBslurm_load_job\fR or +\fBslurm_load_jobs\fR function. .TP \fIjob_pid\fP Specifies a process id of some process on the current node. @@ -183,6 +194,9 @@ expected termination time of a specified SLURM job id. The time corresponds to the exhaustion of the job\'s or partition\'s time limit. NOTE: The data is cached locally and only retrieved from the SLURM controller once per minute. .LP +\fBslurm_load_job\fR Returns a job_info_msg_t that contains an update time, +record count, and array of job_table records for some specific job ID. +.LP \fBslurm_load_jobs\fR Returns a job_info_msg_t that contains an update time, record count, and array of job_table records for all jobs. .LP @@ -331,6 +345,7 @@ expressions into a collection of individual node names. .SH "COPYING" Copyright (C) 2002\-2006 The Regents of the University of California. +Copyright (C) 2008 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). LLNL\-CODE\-402394. .LP diff --git a/doc/man/man3/slurm_load_job.3 b/doc/man/man3/slurm_load_job.3 new file mode 100644 index 0000000000000000000000000000000000000000..836ffa79b478c25f79af77cc002dc816f26891c0 --- /dev/null +++ b/doc/man/man3/slurm_load_job.3 @@ -0,0 +1 @@ +.so man3/slurm_free_job_info_msg.3 diff --git a/doc/man/man3/slurm_step_ctx_create.3 b/doc/man/man3/slurm_step_ctx_create.3 index 76f6cff977427c451794c568dc445e093eb4c263..c949a221f20c39701e29f3586683216520db4ea2 100644 --- a/doc/man/man3/slurm_step_ctx_create.3 +++ b/doc/man/man3/slurm_step_ctx_create.3 @@ -126,7 +126,7 @@ the second of type char **. \fBSLURM_STEP_CTX_CHDIR\fR Have the remote process change directory to the specified location before beginning execution. Accepts one argument of type -char * indentifying the directory's pathname. By default +char * identifying the directory's pathname. By default the remote process will execute in the same directory pathname from which it is spawned. NOTE: This assumes that same directory pathname exists on the other nodes. diff --git a/doc/man/man3/slurm_step_launch.3 b/doc/man/man3/slurm_step_launch.3 index cda4e827e77415b7076d3ad995337f4ccd609703..5434d9f81bbaad8512b5ff3dd1a41574defedd4e 100644 --- a/doc/man/man3/slurm_step_launch.3 +++ b/doc/man/man3/slurm_step_launch.3 @@ -1,4 +1,4 @@ -.TH "Slurm API" "3" "December 2006" "Morris Jette" "Slurm job step launch functions" +.TH "Slurm API" "3" "July 2008" "Morris Jette" "Slurm job step launch functions" .SH "NAME" @@ -19,6 +19,8 @@ void \fBslurm_step_launch_params_t_init\fR ( int \fBslurm_step_launch\fR ( .br slurm_step_ctx \fIctx\fP, +.br + char * \fIlauncher_host\fP, .br const slurm_step_launch_params_t *\fIlaunch_req\fP, .br @@ -57,6 +59,12 @@ function calls, and destroyed by \fBslurm_step_ctx_destroy\fR. \fIlaunch_req\fP Pointer to a structure allocated by the user containing specifications of the job step to be launched. +.TP +\fIlauncher_host\fP +Host name or address to be used to identify the destination of PMI communications +for MPICH2. We intend to embed this information within \fIlaunch_req\fP in the +next major release of SLURM, when changes to the protocol can be more easily +addressed. .SH "DESCRIPTION" .LP @@ -74,6 +82,40 @@ default values. This function will NOT allocate any new memory. \fBslurm_step_launch_abort\fR Abort an in-progress launch, or terminate the fully launched job step. Can be called from a signal handler. +.SH "IO Redirection" +.LP +Use the \fIlocal_fds\fR entry in \fIslurm_step_launch_params_t\fR +to specify file descriptors to be used for standard input, output +and error. Any \fIlocal_fds\fR not specified will result in the launched +tasks using the calling process's standard input, output and error. +Threads created by \fBslurm_step_launch\fR will completely handle +copying data between the remote processes and the specified local file +descriptors. +.LP +Use the substructure in \fIslurm_step_io_fds_t\fR to restrict the +redirection of I/O to a specific node or task ID. For example, to +redirect standard output only from task 0, set +.LP +.nf +params.local_fs.out.taskid=0; +.fi +.LP +Use the \fIremote_*_filename\fR fields in \fIslurm_step_launch_params_t\fR +to have launched tasks read and/or write directly to local files +rather than transferring data over the network to the calling process. +These strings support many of the same format options as the \fBsrun\fR +command. Any \fIremote_*_filename\fR fields set will supersede the +corresponding \fIlocal_fds\fR entries. For example, the following +code will direct each task to write standard output and standard +error to local files with names containing the task ID (e.g. +"/home/bob/test_output/run1.out.0" and +"/home/bob/test_output/run.1.err.0" for task 0). +.LP +.nf +params.remote_output_filename = "/home/bob/test_output/run1.out.%t" +params.remote_error_filename = "/home/bob/test_output/run1.err.%t" +.fi + .SH "RETURN VALUE" .LP \fBslurm_step_launch\fR and \fBslurm_step_launch_wait_start\fR @@ -154,7 +196,7 @@ int main (int argc, char *argv[]) params.argv = argv + 1; callbacks.task_start = _task_start; callbacks.task_finish = _task_finish; - if (slurm_step_launch(step_ctx, ¶ms, &callbacks) + if (slurm_step_launch(step_ctx, NULL, ¶ms, &callbacks) != SLURM_SUCCESS) { slurm_perror("slurm_step_launch"); exit(1); @@ -181,7 +223,8 @@ which must be linked to your process for use (e.g. "cc \-lslurm myprog.c"). .SH "COPYING" -Copyright (C) 2006 The Regents of the University of California. +Copyright (C) 2006-2007 The Regents of the University of California. +Copyright (C) 2008 Lawrence Livermore National Security. Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). LLNL\-CODE\-402394. .LP diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 1d908839d3617961e8332bfce4b5b0734f8ddb19..04f5b0b9ae57dc15517625f209cf38c6e9813a15 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -1,4 +1,4 @@ -.TH "slurm.conf" "5" "June 2008" "slurm.conf 1.3" "Slurm configuration file" +.TH "slurm.conf" "5" "July 2008" "slurm.conf 1.3" "Slurm configuration file" .SH "NAME" slurm.conf \- Slurm configuration file @@ -136,7 +136,7 @@ Name that \fBBackupController\fR should be referred to in establishing a communications path. This name will be used as an argument to the gethostbyname() function for identification. For example, "elx0000" might be used to designate -the ethernet address for node "lx0000". +the Ethernet address for node "lx0000". By default the \fBBackupAddr\fR will be identical in value to \fBBackupController\fR. @@ -182,7 +182,7 @@ Name that \fBControlMachine\fR should be referred to in establishing a communications path. This name will be used as an argument to the gethostbyname() function for identification. For example, "elx0000" might be used to designate -the ethernet address for node "lx0000". +the Ethernet address for node "lx0000". By default the \fBControlAddr\fR will be identical in value to \fBControlMachine\fR. @@ -208,11 +208,25 @@ License (GPL). The default value is "crypto/openssl". .TP -\fBDefMemPerTask\fR -Default real memory size available per task in MegaBytes. +\fBDefMemPerCPU\fR +Default real memory size available per allocated CPU in MegaBytes. Used to avoid over\-subscribing memory and causing paging. -Also see \fBMaxMemPerTask\fR. +\fBDefMemPerCPU\fR would generally be used if individual processors +are alocated to jobs (\fBSelectType=select/cons_res\fR). The default value is 0 (unlimited). +Also see \fBDefMemPerNode\fR and \fBMaxMemPerCPU\fR. +\fBDefMemPerCPU\fR and \fBDefMemPerNode\fR are mutually exclusive. + +.TP +\fBDefMemPerNode\fR +Default real memory size available per allocated node in MegaBytes. +Used to avoid over\-subscribing memory and causing paging. +\fBDefMemPerNode\fR would generally be used if whole nodes +are alocated to jobs (\fBSelectType=select/linear\fR) and +resources are shared (\fBShared=yes\fR or \fBShared=force\fR). +The default value is 0 (unlimited). +Also see \fBDefMemPerCPU\fR and \fBMaxMemPerNode\fR. +\fBDefMemPerCPU\fR and \fBDefMemPerNode\fR are mutually exclusive. .TP \fBDefaultStorageHost\fR @@ -431,9 +445,11 @@ Also see \fBDefaultStoragePort\fR. \fBJobCompType\fR Define the job completion logging mechanism type. Acceptable values at present include "jobcomp/none", "jobcomp/filetxt", -"jobcomp/mysql", "jobcomp/pgsql", "jobcomp/script"and "jobcomp/slurmdbd". +"jobcomp/mysql", "jobcomp/pgsql", and "jobcomp/script"". The default value is "jobcomp/none", which means that upon job completion -the record of the job is purged from the system. +the record of the job is purged from the system. If using the accounting +infrastructure this plugin may not be of interest since the information +here is redundant. The value "jobcomp/filetxt" indicates that a record of the job should be written to a text file specified by the \fBJobCompLoc\fR parameter. The value "jobcomp/mysql" indicates that a record of the job should be @@ -443,10 +459,6 @@ written to a postgresql database specified by the \fBJobCompLoc\fR parameter. The value "jobcomp/script" indicates that a script specified by the \fBJobCompLoc\fR parameter is to be executed with environment variables indicating the job information. -The value "jobcomp/slurmdbd" indicates that job completion records -will be written to SlurmDbd, which maintains its own database. See -"man slurmdbd" for more information. -Also see \fBDefaultStorageType\fR. .TP \fBJobCompUser\fR @@ -525,11 +537,25 @@ of the slurmctld daemon. May not exceed 65533. .TP -\fBMaxMemPerTask\fR -Maximum real memory size available per task in MegaBytes. +\fBMaxMemPerCPU\fR +Maximum real memory size available per allocated CPU in MegaBytes. +Used to avoid over\-subscribing memory and causing paging. +\fBMaxMemPerCPU\fR would generally be used if individual processors +are alocated to jobs (\fBSelectType=select/cons_res\fR). +The default value is 0 (unlimited). +Also see \fBDefMemPerCPU\fR and \fBMaxMemPerNode\fR. +\fBMaxMemPerCPU\fR and \fBMaxMemPerNode\fR are mutually exclusive. + +.TP +\fBMaxMemPerNode\fR +Maximum real memory size available per allocated node in MegaBytes. Used to avoid over\-subscribing memory and causing paging. -Also see \fBDefMemPerTask\fR. +\fBMaxMemPerNode\fR would generally be used if whole nodes +are alocated to jobs (\fBSelectType=select/linear\fR) and +resources are shared (\fBShared=yes\fR or \fBShared=force\fR). The default value is 0 (unlimited). +Also see \fBDefMemPerNode\fR and \fBMaxMemPerCPU\fR. +\fBMaxMemPerCPU\fR and \fBMaxMemPerNode\fR are mutually exclusive. .TP \fBMessageTimeout\fR @@ -576,10 +602,21 @@ on SPANK plugins, see the \fBspank\fR(8) manual. .TP \fBPrivateData\fR -If non-zero then users are unable to view jobs or job steps belonging -to other users (except for SlurmUser or root, who can view all jobs). -The default value is "0", permitting any user to view any jobs or -job steps. +This controls what type of information is hidden from regular users. +By default, all information is visible to all users. +User \fBSlurmUser\fR and root can always view all information. +Multiple values may be specified with a comma separator. +Acceptable values include: +.RS +.TP +\fBjobs\fR prevents users from viewing jobs or job steps belonging +to other users. +.TP +\fBnodes\fR prevents users from viewing node state information. +.TP +\fBpartitions\fR prevents users from viewing partition state information. +.RE + .TP \fBProctrackType\fR @@ -835,22 +872,26 @@ On single\-core systems, each CPUs will be considered a CPU. .TP \fBCR_CPU_Memory\fR CPUs and memory are consumable resources. +Setting a value for \fBDefMemPerCPU\fR is strongly recommended. .TP \fBCR_Core\fR Cores are consumable resources. .TP \fBCR_Core_Memory\fR Cores and memory are consumable resources. +Setting a value for \fBDefMemPerCPU\fR is strongly recommended. .TP \fBCR_Socket\fR Sockets are consumable resources. .TP \fBCR_Socket_Memory\fR Memory and CPUs are consumable resources. +Setting a value for \fBDefMemPerCPU\fR is strongly recommended. .TP \fBCR_Memory\fR Memory is a consumable resource. NOTE: This implies \fIShared=YES\fR or \fIShared=FORCE\fR for all partitions. +Setting a value for \fBDefMemPerCPU\fR is strongly recommended. .RE .TP @@ -1405,7 +1446,16 @@ memory nodes if either will satisfy a job's requirements. The units of weight are arbitrary, but larger weights should be assigned to nodes with more processors, memory, disk space, higher processor speed, etc. -Weight is an integer value with a default value of 1. +Note that if a job allocation request can not be satisfied +using the nodes with the lowest weight, the set of nodes +with the next lowest weight is added to the set of nodes +under consideration for use (repeat as needed for higher +weight values). If you absolutely want to minimize the number +of higher weight nodes allocated to a job (at a cost of higher +scheduling overhead), give each node a distinct \fBWeight\fR +value and they will be added to the pool of nodes being +considered for scheduling individually. +The default value is 1. .LP The "DownNodes=" configuration permits you to mark certain nodes as in a DOWN, DRAIN, FAIL, or FAILING state without altering the permanent @@ -1575,7 +1625,7 @@ Possible values for \fBShared\fR are "EXCLUSIVE", "FORCE", "YES", and "NO". .RS .TP 12 \fBEXCLUSIVE\fR -Aallocates entire nodes to jobs even with select/cons_res configured. +Allocates entire nodes to jobs even with select/cons_res configured. This can be used to allocate whole nodes in some partitions and individual processors in other partitions. .TP @@ -1592,7 +1642,7 @@ with gang scheduling (\fBSchedulerType=sched/gang\fR). .TP \fBYES\fR Make nodes in the partition available for sharing, but provides -the user with a means of getting dediated resources. +the user with a means of getting dedicated resources. If \fBSelectType=select/cons_res\fR, then resources will be over\-subscribed unless explicitly disabled in the job submit request using the "\-\-exclusive" option. diff --git a/doc/man/man8/spank.8 b/doc/man/man8/spank.8 index 7813a6d646df0c26124810c0c6e020bd23b66542..2ee7e5886d274048c78f9c153b69160b00340476 100644 --- a/doc/man/man8/spank.8 +++ b/doc/man/man8/spank.8 @@ -1,4 +1,4 @@ -.TH "SPANK" "8" "May 2006" "SPANK" "SLURM plug\-in architecture for Node and job (K)control" +.TH "SPANK" "8" "Jul 2008" "SPANK" "SLURM plug\-in architecture for Node and job (K)control" .SH "NAME" \fBSPANK\fR \- SLURM Plug\-in Architecture for Node and job (K)control @@ -34,9 +34,17 @@ Plugins may query the context in which they are running with the launch. A plugin may define the following functions: .TP 2 \fBslurm_spank_init\fR -Called just after plugins are loaded. In remote context, this is -just after job step is initialized. For local context, this is before -user options are processed. +Called just after plugins are loaded. In remote context, this is just +after job step is initialized. This function is called before any plugin +option processing. +.TP +\fBslurm_spank_init_post_opt\fR +Called at the same point as \fBslurm_spank_init\fR, but after all +user options to the plugin have been processed. The reason that the +\fBinit\fR and \fBinit_post_opt\fR callbacks are separated is so that +plugins can process system-wide options specified in plugstack.conf in +the \fBinit\fR callback, then process user options, and finaly take some +action in \fBslurm_spank_init_post_opt\fR if necessary. .TP \fBslurm_spank_local_user_init\fR Called in local (\fBsrun\fR or \fBsbatch\fR) context only after all diff --git a/slurm.spec b/slurm.spec index ba9bc65bea7f458a8f142b0b57bb6155827ac616..69ffeefa3a045113c9d4aa114499a278a531c600 100644 --- a/slurm.spec +++ b/slurm.spec @@ -1,4 +1,4 @@ -# $Id: slurm.spec 14366 2008-06-26 20:04:12Z da $ +# $Id: slurm.spec 14616 2008-07-23 22:28:22Z jette $ # # Note that this package is not relocatable @@ -62,7 +62,7 @@ %slurm_with_opt aix %endif -# Build with sgijob, elan, and mysql plugins on CHAOS systems +# Build with sgijob, and mysql plugins on CHAOS systems %if %{?chaos}0 %slurm_with_opt mysql %slurm_with_opt sgijob @@ -71,14 +71,14 @@ %endif Name: slurm -Version: 1.3.5 -Release: 1%{?dist} +Version: 1.3.6 +Release: 1 Summary: Simple Linux Utility for Resource Management License: GPL Group: System Environment/Base -Source: slurm-1.3.5.tar.bz2 +Source: slurm-1.3.6.tar.bz2 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release} URL: https://computing.llnl.gov/linux/slurm/ @@ -249,7 +249,7 @@ SLURM process tracking plugin for SGI job containers. ############################################################################# %prep -%setup -n slurm-1.3.5 +%setup -n slurm-1.3.6 %build %configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \ @@ -439,7 +439,6 @@ rm -rf $RPM_BUILD_ROOT %{_libdir}/slurm/jobcomp_mysql.so %{_libdir}/slurm/jobcomp_pgsql.so %{_libdir}/slurm/jobcomp_script.so -%{_libdir}/slurm/jobcomp_slurmdbd.so %{_libdir}/slurm/proctrack_pgid.so %{_libdir}/slurm/proctrack_linuxproc.so %{_libdir}/slurm/sched_backfill.so diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 6f5573ed26a6f3fb5c1aee5814d8c9988c2d95a0..c48aa5d11ffd70518563ec4b66f8489d603fe411 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -213,7 +213,7 @@ enum job_state_reason { /* Reasons for job to be pending */ WAIT_NO_REASON = 0, /* not set or job not pending */ WAIT_PRIORITY, /* higher priority jobs exist */ - WAIT_DEPENDENCY, /* depedent job has not completed */ + WAIT_DEPENDENCY, /* dependent job has not completed */ WAIT_RESOURCES, /* required resources not available */ WAIT_PART_NODE_LIMIT, /* request exceeds partition node limit */ WAIT_PART_TIME_LIMIT, /* request exceeds partition time limit */ @@ -448,8 +448,13 @@ typedef enum select_type_plugin_info { #define TASK_PARAM_CPUSETS 0x0001 #define TASK_PARAM_SCHED 0x0002 +#define MEM_PER_CPU 0x80000000 #define SHARED_FORCE 0x8000 +#define PRIVATE_DATA_JOBS 0x0001 /* job/step data is private */ +#define PRIVATE_DATA_NODES 0x0002 /* node data is private */ +#define PRIVATE_DATA_PARTITIONS 0x0004 /* partition data is private */ + /*****************************************************************************\ * PROTOCOL DATA STRUCTURE DEFINITIONS \*****************************************************************************/ @@ -528,7 +533,9 @@ typedef struct job_descriptor { /* For submit, allocate, and update requests */ uint16_t job_min_sockets; /* minimum sockets per node, default=0 */ uint16_t job_min_cores; /* minimum cores per processor, default=0 */ uint16_t job_min_threads; /* minimum threads per core, default=0 */ - uint32_t job_min_memory; /* minimum real memory per node, default=0 */ + uint32_t job_min_memory; /* minimum real memory per node OR + * real memory per CPU | MEM_PER_CPU, + * default=0 (no limit) */ uint32_t job_min_tmp_disk; /* minimum tmp disk per node, default=0 */ uint32_t num_procs; /* total count of processors required, * default=0 */ @@ -990,7 +997,8 @@ typedef struct slurm_ctl_conf { bluegene clusters NULL otherwise */ char *plugindir; /* pathname to plugins */ char *plugstack; /* pathname to plugin stack config file */ - uint16_t private_data; /* block viewing of other user jobs */ + uint16_t private_data; /* block viewing of information, + * see PRIVATE_DATA_* */ char *proctrack_type; /* process tracking plugin type */ char *prolog; /* pathname of job prolog */ uint16_t propagate_prio_process; /* 1 if process priority should @@ -1539,6 +1547,15 @@ void slurm_print_slurmd_status PARAMS(( * SLURM JOB CONTROL CONFIGURATION READ/PRINT/UPDATE FUNCTIONS \*****************************************************************************/ +/* + * slurm_load_job - issue RPC to get job information for one job ID + * IN job_info_msg_pptr - place to store a job configuration pointer + * IN job_id - ID of job we want information about + * RET 0 or -1 on error + * NOTE: free the response using slurm_free_job_info_msg + */ +extern int slurm_load_job PARAMS((job_info_msg_t **resp, uint32_t job_id)); + /* * slurm_load_jobs - issue RPC to get slurm all job configuration * information if changed since update_time diff --git a/slurm/spank.h b/slurm/spank.h index 27f2a7f9cb8484d76c93c26bff314fb801da3473..5ef0a5eb10a59a783812b57e59c10369f859d282 100644 --- a/slurm/spank.h +++ b/slurm/spank.h @@ -1,7 +1,8 @@ /*****************************************************************************\ * spank.h - Stackable Plug-in Architecture for Node job Kontrol ***************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * LLNL-CODE-402394. * @@ -61,6 +62,8 @@ typedef int (spank_f) (spank_t spank, int ac, char *argv[]); * * slurmd -> slurmstepd * `-> init () + * -> process spank options + * -> init_post_opt () * + drop privileges (initgroups(), seteuid(), chdir()) * `-> user_init () * + for each task @@ -82,6 +85,7 @@ typedef int (spank_f) (spank_t spank, int ac, char *argv[]); */ extern spank_f slurm_spank_init; +extern spank_f slurm_spank_init_post_opt; extern spank_f slurm_spank_local_user_init; extern spank_f slurm_spank_user_init; extern spank_f slurm_spank_task_init; @@ -127,7 +131,13 @@ enum spank_item { S_JOB_PID_TO_LOCAL_ID, /* local task id from pid (pid_t, uint32_t *) */ S_JOB_LOCAL_TO_GLOBAL_ID,/* local id to global id (uint32_t, uint32_t *) */ S_JOB_GLOBAL_TO_LOCAL_ID,/* global id to local id (uint32_t, uint32_t *) */ - S_JOB_SUPPLEMENTARY_GIDS /* Array of suppl. gids (gid_t **, int *) */ + S_JOB_SUPPLEMENTARY_GIDS,/* Array of suppl. gids (gid_t **, int *) */ + S_SLURM_VERSION, /* Current slurm version (char **) */ + S_SLURM_VERSION_MAJOR, /* Current slurm version major release (char **) */ + S_SLURM_VERSION_MINOR, /* Current slurm version minor release (char **) */ + S_SLURM_VERSION_MICRO, /* Current slurm version micro release (char **) */ + S_STEP_CPUS_PER_TASK /* CPUs allocated per task (=1 if --overcommit + * option is used, uint32_t *) */ }; typedef enum spank_item spank_item_t; @@ -216,8 +226,9 @@ int spank_remote (spank_t spank); /* Get the value for the current job or task item specified, * storing the result in the subsequent pointer argument(s). * Refer to the spank_item_t comments for argument types. - * For S_JOB_ARGV and S_JOB_ENV items the result returned to - * the caller should not be freed or modified. + * For S_JOB_ARGV, S_JOB_ENV, and S_SLURM_VERSION* items + * the result returned to the caller should not be freed or + * modified. * * Returns ESPANK_SUCCESS on success, ESPANK_NOTASK if an S_TASK* * item is requested from outside a task context, ESPANK_BAD_ARG diff --git a/src/api/Makefile.am b/src/api/Makefile.am index b5634c6e73cd9eecbd9f936d734152f0da03e775..0000a85496aac10ea3be8d564e8b3b7a3a5a4f67 100644 --- a/src/api/Makefile.am +++ b/src/api/Makefile.am @@ -53,9 +53,15 @@ lib_LTLIBRARIES = libslurm.la libpmi.la BUILT_SOURCES = $(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT) libslurm.la # Note that libslurmhelper is mostly the same as libslurm, except that -# it exports ALL symbols, including those from libcommon, libeio, etc. -# Also, libslurmhelper is a convenience library, it is not installed. +# it exports ALL symbols used by the process, libcommon, libeio, etc. +# Only link with libslurmhelper if you are sure you are not going to be +# loading a plugin that could use something you yourself are not +# calling from here. +# libslurm.o only contains all the api symbols and will export +# them to plugins that are loaded. +# Also, libslurmhelper, libslurm.o are for convenience, they are not installed. noinst_LTLIBRARIES = libslurmhelper.la +noinst_PROGRAMS = libslurm.o slurmapi_src = \ allocate.c \ @@ -84,9 +90,9 @@ slurmapi_src = \ common_dir = $(top_builddir)/src/common -slurmapi_add = \ +slurmapi_add = \ $(common_dir)/libcommon.la \ - $(common_dir)/libspank.la \ + $(common_dir)/libspank.la \ $(common_dir)/libeio.la \ -lpthread @@ -96,7 +102,8 @@ libslurmhelper_la_LDFLAGs = \ $(LIB_LDFLAGS) \ -version-info $(current):$(rev):$(age) -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = \ + $(top_builddir)/src/api/libslurmhelper.la libslurm_la_SOURCES = libslurm_la_LIBADD = $(convenience_libs) @@ -115,8 +122,20 @@ libpmi_la_LIBADD = $(top_builddir)/src/api/libslurm.la libpmi_la_LDFLAGS = $(LIB_LDFLAGS) \ $(PMI_OTHER_FLAGS) +# This was made so we chould export all symbols from libcommon +# on multiple platforms +libslurm_o_SOURCES = + +libslurm_o_LDADD = $(top_builddir)/src/common/libeio.o \ + $(top_builddir)/src/common/libspank.o \ + $(top_builddir)/src/common/libcommon.o + +libslurm.o: $(libslurmhelper_la_OBJECTS) $(libslurm_o_LDADD) + $(LINK) $(libslurmhelper_la_OBJECTS) $(libslurm_o_LDADD) + + force: -$(convenience_libs) : force +$(libslurm_o_LDADD) $(convenience_libs) : force @cd `dirname $@` && $(MAKE) `basename $@` diff --git a/src/api/Makefile.in b/src/api/Makefile.in index 107f973d111487e627ac6046c649c4bee579f09b..59d5d3dc068d20cfe96c3327476d388c95cfc27d 100644 --- a/src/api/Makefile.in +++ b/src/api/Makefile.in @@ -17,6 +17,7 @@ # # Makefile for slurm API library + VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ @@ -36,6 +37,7 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ +noinst_PROGRAMS = libslurm.o$(EXEEXT) subdir = src/api DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 @@ -99,6 +101,12 @@ am__objects_1 = allocate.lo allocate_msg.lo cancel.lo checkpoint.lo \ triggers.lo reconfigure.lo update_config.lo am_libslurmhelper_la_OBJECTS = $(am__objects_1) libslurmhelper_la_OBJECTS = $(am_libslurmhelper_la_OBJECTS) +PROGRAMS = $(noinst_PROGRAMS) +am_libslurm_o_OBJECTS = +libslurm_o_OBJECTS = $(am_libslurm_o_OBJECTS) +libslurm_o_DEPENDENCIES = $(top_builddir)/src/common/libeio.o \ + $(top_builddir)/src/common/libspank.o \ + $(top_builddir)/src/common/libcommon.o DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp am__depfiles_maybe = depfiles @@ -112,9 +120,9 @@ LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ SOURCES = $(libpmi_la_SOURCES) $(libslurm_la_SOURCES) \ - $(libslurmhelper_la_SOURCES) + $(libslurmhelper_la_SOURCES) $(libslurm_o_SOURCES) DIST_SOURCES = $(libpmi_la_SOURCES) $(libslurm_la_SOURCES) \ - $(libslurmhelper_la_SOURCES) + $(libslurmhelper_la_SOURCES) $(libslurm_o_SOURCES) ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) @@ -336,8 +344,13 @@ lib_LTLIBRARIES = libslurm.la libpmi.la BUILT_SOURCES = $(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT) libslurm.la # Note that libslurmhelper is mostly the same as libslurm, except that -# it exports ALL symbols, including those from libcommon, libeio, etc. -# Also, libslurmhelper is a convenience library, it is not installed. +# it exports ALL symbols used by the process, libcommon, libeio, etc. +# Only link with libslurmhelper if you are sure you are not going to be +# loading a plugin that could use something you yourself are not +# calling from here. +# libslurm.o only contains all the api symbols and will export +# them to plugins that are loaded. +# Also, libslurmhelper, libslurm.o are for convenience, they are not installed. noinst_LTLIBRARIES = libslurmhelper.la slurmapi_src = \ allocate.c \ @@ -367,7 +380,7 @@ slurmapi_src = \ common_dir = $(top_builddir)/src/common slurmapi_add = \ $(common_dir)/libcommon.la \ - $(common_dir)/libspank.la \ + $(common_dir)/libspank.la \ $(common_dir)/libeio.la \ -lpthread @@ -377,7 +390,9 @@ libslurmhelper_la_LDFLAGs = \ $(LIB_LDFLAGS) \ -version-info $(current):$(rev):$(age) -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = \ + $(top_builddir)/src/api/libslurmhelper.la + libslurm_la_SOURCES = libslurm_la_LIBADD = $(convenience_libs) libslurm_la_LDFLAGS = \ @@ -396,6 +411,14 @@ libpmi_la_LIBADD = $(top_builddir)/src/api/libslurm.la libpmi_la_LDFLAGS = $(LIB_LDFLAGS) \ $(PMI_OTHER_FLAGS) + +# This was made so we chould export all symbols from libcommon +# on multiple platforms +libslurm_o_SOURCES = +libslurm_o_LDADD = $(top_builddir)/src/common/libeio.o \ + $(top_builddir)/src/common/libspank.o \ + $(top_builddir)/src/common/libcommon.o + CLEANFILES = \ $(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT) @@ -479,6 +502,13 @@ libslurm.la: $(libslurm_la_OBJECTS) $(libslurm_la_DEPENDENCIES) libslurmhelper.la: $(libslurmhelper_la_OBJECTS) $(libslurmhelper_la_DEPENDENCIES) $(LINK) $(libslurmhelper_la_OBJECTS) $(libslurmhelper_la_LIBADD) $(LIBS) +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; for p in $$list; do \ + f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f $$p $$f"; \ + rm -f $$p $$f ; \ + done + mostlyclean-compile: -rm -f *.$(OBJEXT) @@ -613,7 +643,7 @@ distdir: $(DISTFILES) check-am: all-am check: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) check-am -all-am: Makefile $(LTLIBRARIES) +all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) installdirs: for dir in "$(DESTDIR)$(libdir)"; do \ test -z "$$dir" || $(MKDIR_P) "$$dir"; \ @@ -649,7 +679,7 @@ maintainer-clean-generic: clean: clean-am clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ - clean-noinstLTLIBRARIES mostlyclean-am + clean-noinstLTLIBRARIES clean-noinstPROGRAMS mostlyclean-am distclean: distclean-am -rm -rf ./$(DEPDIR) @@ -709,21 +739,25 @@ uninstall-am: uninstall-libLTLIBRARIES .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ clean-libLTLIBRARIES clean-libtool clean-noinstLTLIBRARIES \ - ctags distclean distclean-compile distclean-generic \ - distclean-libtool distclean-tags distdir dvi dvi-am html \ - html-am info info-am install install-am install-data \ - install-data-am install-dvi install-dvi-am install-exec \ - install-exec-am install-html install-html-am install-info \ - install-info-am install-libLTLIBRARIES install-man install-pdf \ - install-pdf-am install-ps install-ps-am install-strip \ - installcheck installcheck-am installdirs maintainer-clean \ - maintainer-clean-generic mostlyclean mostlyclean-compile \ - mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ - tags uninstall uninstall-am uninstall-libLTLIBRARIES - + clean-noinstPROGRAMS ctags distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-libLTLIBRARIES \ + install-man install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \ + uninstall-am uninstall-libLTLIBRARIES + + +libslurm.o: $(libslurmhelper_la_OBJECTS) $(libslurm_o_LDADD) + $(LINK) $(libslurmhelper_la_OBJECTS) $(libslurm_o_LDADD) force: -$(convenience_libs) : force +$(libslurm_o_LDADD) $(convenience_libs) : force @cd `dirname $@` && $(MAKE) `basename $@` # FIXME - don't export the client_io_handler_ symbols once srun is no longer diff --git a/src/api/allocate.c b/src/api/allocate.c index 96769c7c0477dd4c9e649e950193b744d42cbb8a..0fd089694a68bae3a96b2362952e753cea84e375 100644 --- a/src/api/allocate.c +++ b/src/api/allocate.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * allocate.c - allocate nodes for a job or step with supplied contraints - * $Id: allocate.c 14453 2008-07-08 20:26:18Z da $ + * $Id: allocate.c 14571 2008-07-18 22:25:56Z jette $ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -251,6 +251,8 @@ slurm_allocate_resources_blocking (const job_desc_msg_t *user_req, /* yes, allocation has been granted */ errno = SLURM_PROTOCOL_SUCCESS; } else if (!req->immediate) { + if(resp->error_code != SLURM_SUCCESS) + info("%s", slurm_strerror(resp->error_code)); /* no, we need to wait for a response */ job_id = resp->job_id; slurm_free_resource_allocation_response_msg(resp); @@ -631,6 +633,9 @@ _handle_msg(slurm_msg_t *msg, resource_allocation_response_msg_t **resp) *resp = msg->data; rc = 1; break; + case SRUN_JOB_COMPLETE: + info("Job has been cancelled"); + break; default: error("received spurious message type: %d\n", msg->msg_type); diff --git a/src/api/config_info.c b/src/api/config_info.c index db17884c6c42774433f094316ec80a200d6f8236..f4afe45788edaf54e7091c558770c8f29ec11f4b 100644 --- a/src/api/config_info.c +++ b/src/api/config_info.c @@ -61,6 +61,7 @@ extern long slurm_api_version (void) return (long) SLURM_API_VERSION; } + static char * _select_info(uint16_t select_type_param) { @@ -109,7 +110,7 @@ static char *_task_plugin_param(uint16_t task_plugin_param) void slurm_print_ctl_conf ( FILE* out, slurm_ctl_conf_info_msg_t * slurm_ctl_conf_ptr ) { - char time_str[32]; + char time_str[32], tmp_str[128]; if ( slurm_ctl_conf_ptr == NULL ) return ; @@ -153,11 +154,15 @@ void slurm_print_ctl_conf ( FILE* out, slurm_ctl_conf_ptr->control_machine); fprintf(out, "CryptoType = %s\n", slurm_ctl_conf_ptr->crypto_type); - if (slurm_ctl_conf_ptr->def_mem_per_task) { - fprintf(out, "DefMemPerTask = %u\n", + if (slurm_ctl_conf_ptr->def_mem_per_task & MEM_PER_CPU) { + fprintf(out, "DefMemPerCPU = %u\n", + slurm_ctl_conf_ptr->def_mem_per_task & + (~MEM_PER_CPU)); + } else if (slurm_ctl_conf_ptr->def_mem_per_task) { + fprintf(out, "DefMemPerNode = %u\n", slurm_ctl_conf_ptr->def_mem_per_task); } else - fprintf(out, "DefMemPerTask = UNLIMITED\n"); + fprintf(out, "DefMemPerCPU = UNLIMITED\n"); if (slurm_ctl_conf_ptr->disable_root_jobs) fprintf(out, "DisableRootJobs = YES\n"); else @@ -220,11 +225,15 @@ void slurm_print_ctl_conf ( FILE* out, slurm_ctl_conf_ptr->mail_prog); fprintf(out, "MaxJobCount = %u\n", slurm_ctl_conf_ptr->max_job_cnt); - if (slurm_ctl_conf_ptr->max_mem_per_task) { - fprintf(out, "MaxMemPerTask = %u\n", + if (slurm_ctl_conf_ptr->max_mem_per_task & MEM_PER_CPU) { + fprintf(out, "MaxMemPerCPU = %u\n", + slurm_ctl_conf_ptr->max_mem_per_task & + (~MEM_PER_CPU)); + } else if (slurm_ctl_conf_ptr->max_mem_per_task) { + fprintf(out, "MaxMemPerNode = %u\n", slurm_ctl_conf_ptr->max_mem_per_task); } else - fprintf(out, "MaxMemPerTask = UNLIMITED\n"); + fprintf(out, "MaxMemPerCPU = UNLIMITED\n"); fprintf(out, "MessageTimeout = %u\n", slurm_ctl_conf_ptr->msg_timeout); fprintf(out, "MinJobAge = %u\n", @@ -240,8 +249,9 @@ void slurm_print_ctl_conf ( FILE* out, slurm_ctl_conf_ptr->plugindir); fprintf(out, "PlugStackConfig = %s\n", slurm_ctl_conf_ptr->plugstack); - fprintf(out, "PrivateData = %u\n", - slurm_ctl_conf_ptr->private_data); + private_data_string(slurm_ctl_conf_ptr->private_data, + tmp_str, sizeof(tmp_str)); + fprintf(out, "PrivateData = %s\n", tmp_str); fprintf(out, "ProctrackType = %s\n", slurm_ctl_conf_ptr->proctrack_type); fprintf(out, "Prolog = %s\n", diff --git a/src/api/init_msg.c b/src/api/init_msg.c index ec1e1f797fea38d789dae485e710e2a87ac061ad..333752b31d3f1bcb9320351057a08e97a16b22fa 100644 --- a/src/api/init_msg.c +++ b/src/api/init_msg.c @@ -1,8 +1,8 @@ /*****************************************************************************\ * init_msg.c - initialize RPC messages contents - * $Id: init_msg.c 13672 2008-03-19 23:10:58Z jette $ ***************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov>. * LLNL-CODE-402394. @@ -55,71 +55,25 @@ */ void slurm_init_job_desc_msg(job_desc_msg_t * job_desc_msg) { - job_desc_msg->account = NULL; - job_desc_msg->acctg_freq = (uint16_t) NO_VAL; - job_desc_msg->alloc_node = NULL; - job_desc_msg->alloc_sid = NO_VAL; - job_desc_msg->comment = NULL; - job_desc_msg->contiguous = (uint16_t) NO_VAL; - job_desc_msg->cpus_per_task = (uint16_t) NO_VAL; - job_desc_msg->ntasks_per_node = (uint16_t) NO_VAL; - job_desc_msg->ntasks_per_socket = (uint16_t) NO_VAL; - job_desc_msg->ntasks_per_core = (uint16_t) NO_VAL; - job_desc_msg->dependency = NULL; - job_desc_msg->environment = ((char **) NULL); - job_desc_msg->env_size = 0; - job_desc_msg->features = NULL; - job_desc_msg->immediate = 0; - job_desc_msg->job_id = NO_VAL; - job_desc_msg->job_min_cores = (uint16_t) NO_VAL; - job_desc_msg->job_min_procs = (uint16_t) NO_VAL; - job_desc_msg->job_min_sockets = (uint16_t) NO_VAL; - job_desc_msg->job_min_threads = (uint16_t) NO_VAL; - job_desc_msg->job_min_memory = NO_VAL; - job_desc_msg->job_min_tmp_disk= NO_VAL; - job_desc_msg->kill_on_node_fail = (uint16_t) NO_VAL; - job_desc_msg->licenses = NULL; - job_desc_msg->name = NULL; - job_desc_msg->network = NULL; - job_desc_msg->nice = NICE_OFFSET; - job_desc_msg->ntasks_per_core = (uint16_t) NO_VAL; - job_desc_msg->ntasks_per_node = (uint16_t) NO_VAL; - job_desc_msg->ntasks_per_socket = (uint16_t) NO_VAL; - job_desc_msg->num_tasks = NO_VAL; - job_desc_msg->open_mode = 0; /* system default */ - job_desc_msg->overcommit = (uint8_t) NO_VAL; - job_desc_msg->partition = NULL; - job_desc_msg->plane_size = (uint16_t) NO_VAL; - job_desc_msg->priority = NO_VAL; - job_desc_msg->req_nodes = NULL; - job_desc_msg->exc_nodes = NULL; - job_desc_msg->script = NULL; - job_desc_msg->argv = ((char **) NULL); - job_desc_msg->argc = 0; - job_desc_msg->shared = (uint16_t) NO_VAL; - job_desc_msg->task_dist = (uint16_t) NO_VAL; - job_desc_msg->time_limit = NO_VAL; - job_desc_msg->num_procs = NO_VAL; - job_desc_msg->max_nodes = NO_VAL; - job_desc_msg->min_nodes = NO_VAL; - job_desc_msg->max_sockets = (uint16_t) NO_VAL; - job_desc_msg->min_sockets = (uint16_t) NO_VAL; - job_desc_msg->max_cores = (uint16_t) NO_VAL; - job_desc_msg->min_cores = (uint16_t) NO_VAL; - job_desc_msg->max_threads = (uint16_t) NO_VAL; - job_desc_msg->min_threads = (uint16_t) NO_VAL; - job_desc_msg->err = NULL; - job_desc_msg->in = NULL; - job_desc_msg->out = NULL; - job_desc_msg->user_id = NO_VAL; - job_desc_msg->group_id = NO_VAL; - job_desc_msg->work_dir = NULL; - job_desc_msg->alloc_resp_port = 0; - job_desc_msg->other_port = 0; - job_desc_msg->mail_type = 0; - job_desc_msg->mail_user = NULL; - job_desc_msg->begin_time = 0; - job_desc_msg->requeue = (uint16_t) NO_VAL; + job_desc_msg->account = NULL; + job_desc_msg->acctg_freq = (uint16_t) NO_VAL; + job_desc_msg->alloc_node = NULL; + job_desc_msg->alloc_resp_port = 0; + job_desc_msg->alloc_sid = NO_VAL; + job_desc_msg->argc = 0; + job_desc_msg->argv = ((char **) NULL); + job_desc_msg->begin_time = 0; + job_desc_msg->blrtsimage = NULL; + job_desc_msg->comment = NULL; + job_desc_msg->conn_type = (uint16_t) NO_VAL; + job_desc_msg->contiguous = (uint16_t) NO_VAL; + job_desc_msg->cpus_per_task = (uint16_t) NO_VAL; + job_desc_msg->dependency = NULL; + job_desc_msg->environment = ((char **) NULL); + job_desc_msg->env_size = 0; + job_desc_msg->err = NULL; + job_desc_msg->exc_nodes = NULL; + job_desc_msg->features = NULL; #if SYSTEM_DIMENSIONS { int i; @@ -127,14 +81,58 @@ void slurm_init_job_desc_msg(job_desc_msg_t * job_desc_msg) job_desc_msg->geometry[i] = (uint16_t) NO_VAL; } #endif - job_desc_msg->conn_type = (uint16_t) NO_VAL; - job_desc_msg->reboot = (uint16_t) NO_VAL; - job_desc_msg->rotate = (uint16_t) NO_VAL; - job_desc_msg->blrtsimage = NULL; - job_desc_msg->linuximage = NULL; - job_desc_msg->mloaderimage = NULL; - job_desc_msg->ramdiskimage = NULL; - job_desc_msg->select_jobinfo = NULL; + job_desc_msg->group_id = NO_VAL; + job_desc_msg->immediate = 0; + job_desc_msg->in = NULL; + job_desc_msg->job_id = NO_VAL; + job_desc_msg->job_min_cores = (uint16_t) NO_VAL; + job_desc_msg->job_min_procs = (uint16_t) NO_VAL; + job_desc_msg->job_min_sockets = (uint16_t) NO_VAL; + job_desc_msg->job_min_threads = (uint16_t) NO_VAL; + job_desc_msg->job_min_memory = NO_VAL; + job_desc_msg->job_min_tmp_disk = NO_VAL; + job_desc_msg->kill_on_node_fail = (uint16_t) NO_VAL; + job_desc_msg->licenses = NULL; + job_desc_msg->linuximage = NULL; + job_desc_msg->mail_type = 0; + job_desc_msg->mail_user = NULL; + job_desc_msg->max_cores = (uint16_t) NO_VAL; + job_desc_msg->max_nodes = NO_VAL; + job_desc_msg->max_sockets = (uint16_t) NO_VAL; + job_desc_msg->max_threads = (uint16_t) NO_VAL; + job_desc_msg->min_cores = (uint16_t) NO_VAL; + job_desc_msg->min_nodes = NO_VAL; + job_desc_msg->min_sockets = (uint16_t) NO_VAL; + job_desc_msg->min_threads = (uint16_t) NO_VAL; + job_desc_msg->mloaderimage = NULL; + job_desc_msg->name = NULL; + job_desc_msg->network = NULL; + job_desc_msg->nice = NICE_OFFSET; + job_desc_msg->ntasks_per_core = (uint16_t) NO_VAL; + job_desc_msg->ntasks_per_node = (uint16_t) NO_VAL; + job_desc_msg->ntasks_per_socket = (uint16_t) NO_VAL; + job_desc_msg->num_procs = NO_VAL; + job_desc_msg->num_tasks = NO_VAL; + job_desc_msg->open_mode = 0; /* system default */ + job_desc_msg->other_port = 0; + job_desc_msg->out = NULL; + job_desc_msg->overcommit = (uint8_t) NO_VAL; + job_desc_msg->partition = NULL; + job_desc_msg->plane_size = (uint16_t) NO_VAL; + job_desc_msg->priority = NO_VAL; + job_desc_msg->ramdiskimage = NULL; + job_desc_msg->reboot = (uint16_t) NO_VAL; + job_desc_msg->resp_host = NULL; + job_desc_msg->req_nodes = NULL; + job_desc_msg->requeue = (uint16_t) NO_VAL; + job_desc_msg->rotate = (uint16_t) NO_VAL; + job_desc_msg->script = NULL; + job_desc_msg->select_jobinfo = NULL; + job_desc_msg->shared = (uint16_t) NO_VAL; + job_desc_msg->task_dist = (uint16_t) NO_VAL; + job_desc_msg->time_limit = NO_VAL; + job_desc_msg->user_id = NO_VAL; + job_desc_msg->work_dir = NULL; } /* diff --git a/src/api/job_info.c b/src/api/job_info.c index dc9c696ebf8f9e21614a3258984ef3a4105d7ab3..f44ed7db99757edbee9bce412f3c995e8a0feecb 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -1,8 +1,8 @@ /*****************************************************************************\ * job_info.c - get/print the job state information of slurm - * $Id: job_info.c 14298 2008-06-20 16:45:30Z jette $ ***************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> et. al. * LLNL-CODE-402394. @@ -371,13 +371,18 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) xstrcat(out, "\n "); /****** Line 10 ******/ + if (job_ptr->job_min_memory & MEM_PER_CPU) { + job_ptr->job_min_memory &= (~MEM_PER_CPU); + tmp3_ptr = "CPU"; + } else + tmp3_ptr = "Node"; convert_num_unit((float)job_ptr->job_min_memory, tmp1, sizeof(tmp1), UNIT_NONE); convert_num_unit((float)job_ptr->job_min_tmp_disk, tmp2, sizeof(tmp2), UNIT_NONE); snprintf(tmp_line, sizeof(tmp_line), - "MinMemory=%s MinTmpDisk=%s Features=%s", - tmp1, tmp2, job_ptr->features); + "MinMemory%s=%s MinTmpDisk=%s Features=%s", + tmp3_ptr, tmp1, tmp2, job_ptr->features); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); @@ -551,8 +556,9 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) return out; } + /* - * slurm_load_jobs - issue RPC to get slurm all job configuration + * slurm_load_jobs - issue RPC to get all job configuration * information if changed since update_time * IN update_time - time of current configuration data * IN job_info_msg_pptr - place to store a job configuration pointer @@ -598,6 +604,48 @@ slurm_load_jobs (time_t update_time, job_info_msg_t **resp, return SLURM_PROTOCOL_SUCCESS ; } +/* + * slurm_load_job - issue RPC to get job information for one job ID + * IN job_info_msg_pptr - place to store a job configuration pointer + * IN job_id - ID of job we want information about + * RET 0 or -1 on error + * NOTE: free the response using slurm_free_job_info_msg + */ +extern int +slurm_load_job (job_info_msg_t **resp, uint32_t job_id) +{ + int rc; + slurm_msg_t resp_msg; + slurm_msg_t req_msg; + job_id_msg_t req; + + slurm_msg_t_init(&req_msg); + slurm_msg_t_init(&resp_msg); + + req.job_id = job_id; + req_msg.msg_type = REQUEST_JOB_INFO_SINGLE; + req_msg.data = &req; + + if (slurm_send_recv_controller_msg(&req_msg, &resp_msg) < 0) + return SLURM_ERROR; + + switch (resp_msg.msg_type) { + case RESPONSE_JOB_INFO: + *resp = (job_info_msg_t *)resp_msg.data; + break; + case RESPONSE_SLURM_RC: + rc = ((return_code_msg_t *) resp_msg.data)->return_code; + slurm_free_return_code_msg(resp_msg.data); + if (rc) + slurm_seterrno_ret(rc); + break; + default: + slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR); + break; + } + + return SLURM_PROTOCOL_SUCCESS ; +} /* * slurm_pid2jobid - issue RPC to get the slurm job_id given a process_id diff --git a/src/api/step_ctx.c b/src/api/step_ctx.c index 3b3bd1d1cdea26de18b97a8c7f6c4b64295cb851..9cc2ddc8b53e541f6bf929df08fbf7e347748f70 100644 --- a/src/api/step_ctx.c +++ b/src/api/step_ctx.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * step_ctx.c - step_ctx task functions for use by AIX/POE * - * $Id: step_ctx.c 13672 2008-03-19 23:10:58Z jette $ + * $Id: step_ctx.c 14469 2008-07-09 18:15:23Z jette $ ***************************************************************************** * Copyright (C) 2004 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -58,12 +58,14 @@ static void _job_fake_cred(struct slurm_step_ctx_struct *ctx) { slurm_cred_arg_t arg; - arg.jobid = ctx->job_id; - arg.stepid = ctx->step_resp->job_step_id; - arg.uid = ctx->user_id; - arg.hostlist = ctx->step_req->node_list; - arg.alloc_lps_cnt = 0; - arg.alloc_lps = NULL; + arg.alloc_lps_cnt = 0; + arg.alloc_lps = NULL; + arg.hostlist = ctx->step_req->node_list; + arg.job_mem = 0; + arg.jobid = ctx->job_id; + arg.stepid = ctx->step_resp->job_step_id; + arg.task_mem = 0; + arg.uid = ctx->user_id; ctx->step_resp->cred = slurm_cred_faker(&arg); } diff --git a/src/api/step_launch.c b/src/api/step_launch.c index 61c100f4bee66090112b431f4fa201db3e00c40b..981fb2da6513528c52008b16702b55960eb20f3b 100644 --- a/src/api/step_launch.c +++ b/src/api/step_launch.c @@ -410,7 +410,8 @@ void slurm_step_launch_wait_finish(slurm_step_ctx_t *ctx) } if (!force_terminated_job && task_exit_signal) - info("Force Terminated job step"); + info("Force Terminated job step %u.%u", + ctx->job_id, ctx->step_resp->job_step_id); /* Then shutdown the message handler thread */ eio_signal_shutdown(sls->msg_handle); diff --git a/src/common/Makefile.am b/src/common/Makefile.am index b1a86ac22766ca094ee4deae1c5ded7f3f47300e..f6127ebe20e3e9b59465d712d50760f0923ab420 100644 --- a/src/common/Makefile.am +++ b/src/common/Makefile.am @@ -20,7 +20,7 @@ endif INCLUDES = -I$(top_srcdir) -noinst_PROGRAMS = libcommon.o +noinst_PROGRAMS = libcommon.o libeio.o libspank.o noinst_LTLIBRARIES = \ libcommon.la \ @@ -118,14 +118,24 @@ libcommon_la_LIBADD = -ldl libcommon_la_LDFLAGS = $(LIB_LDFLAGS) -module --export-dynamic -libcommon_o_SOURCES = - - # This was made so we chould export all symbols from libcommon # on multiple platforms +libcommon_o_SOURCES = libcommon.o : $(libcommon_la_OBJECTS) $(libcommon_la_DEPENDENCIES) $(libcommon_la_LINK) $(libcommon_la_OBJECTS) +# This was made so we chould export all symbols from libeio +# on multiple platforms +libeio_o_SOURCES = +libeio.o : $(libeio_la_OBJECTS) $(libeio_la_DEPENDENCIES) + $(LINK) $(libeio_la_OBJECTS) + +# This was made so we chould export all symbols from libspank +# on multiple platforms +libspank_o_SOURCES = +libspank.o : $(libspank_la_OBJECTS) $(libspank_la_DEPENDENCIES) + $(LINK) $(libspank_la_OBJECTS) + global_defaults.c : $(top_builddir)/config.h Makefile @( echo "/* This file autogenerated by src/common/Makefile */"; \ diff --git a/src/common/Makefile.in b/src/common/Makefile.in index 10ada442258550ce65a5988c40b8283a17c154bb..e8dddce1ab12fbeadf705d6dbf4bbb61cc2378fe 100644 --- a/src/common/Makefile.in +++ b/src/common/Makefile.in @@ -44,7 +44,8 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ -noinst_PROGRAMS = libcommon.o$(EXEEXT) +noinst_PROGRAMS = libcommon.o$(EXEEXT) libeio.o$(EXEEXT) \ + libspank.o$(EXEEXT) subdir = src/common DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 @@ -142,6 +143,12 @@ PROGRAMS = $(noinst_PROGRAMS) am_libcommon_o_OBJECTS = libcommon_o_OBJECTS = $(am_libcommon_o_OBJECTS) libcommon_o_LDADD = $(LDADD) +am_libeio_o_OBJECTS = +libeio_o_OBJECTS = $(am_libeio_o_OBJECTS) +libeio_o_LDADD = $(LDADD) +am_libspank_o_OBJECTS = +libspank_o_OBJECTS = $(am_libspank_o_OBJECTS) +libspank_o_LDADD = $(LDADD) DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp am__depfiles_maybe = depfiles @@ -156,11 +163,13 @@ LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ $(LDFLAGS) -o $@ SOURCES = $(libcommon_la_SOURCES) $(EXTRA_libcommon_la_SOURCES) \ $(libdaemonize_la_SOURCES) $(libeio_la_SOURCES) \ - $(libspank_la_SOURCES) $(libcommon_o_SOURCES) + $(libspank_la_SOURCES) $(libcommon_o_SOURCES) \ + $(libeio_o_SOURCES) $(libspank_o_SOURCES) DIST_SOURCES = $(am__libcommon_la_SOURCES_DIST) \ $(am__EXTRA_libcommon_la_SOURCES_DIST) \ $(libdaemonize_la_SOURCES) $(libeio_la_SOURCES) \ - $(libspank_la_SOURCES) $(libcommon_o_SOURCES) + $(libspank_la_SOURCES) $(libcommon_o_SOURCES) \ + $(libeio_o_SOURCES) $(libspank_o_SOURCES) ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) @@ -431,7 +440,18 @@ libspank_la_SOURCES = \ libcommon_la_LIBADD = -ldl libcommon_la_LDFLAGS = $(LIB_LDFLAGS) -module --export-dynamic + +# This was made so we chould export all symbols from libcommon +# on multiple platforms libcommon_o_SOURCES = + +# This was made so we chould export all symbols from libeio +# on multiple platforms +libeio_o_SOURCES = + +# This was made so we chould export all symbols from libspank +# on multiple platforms +libspank_o_SOURCES = all: all-am .SUFFIXES: @@ -762,11 +782,12 @@ uninstall-am: mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags uninstall uninstall-am - -# This was made so we chould export all symbols from libcommon -# on multiple platforms libcommon.o : $(libcommon_la_OBJECTS) $(libcommon_la_DEPENDENCIES) $(libcommon_la_LINK) $(libcommon_la_OBJECTS) +libeio.o : $(libeio_la_OBJECTS) $(libeio_la_DEPENDENCIES) + $(LINK) $(libeio_la_OBJECTS) +libspank.o : $(libspank_la_OBJECTS) $(libspank_la_DEPENDENCIES) + $(LINK) $(libspank_la_OBJECTS) global_defaults.c : $(top_builddir)/config.h Makefile @( echo "/* This file autogenerated by src/common/Makefile */"; \ diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c index 21a5b2d2a701edb722274cc7afdde38b27faae76..39a034312812d59b80d56a6c3237fdea2eb26ffc 100644 --- a/src/common/assoc_mgr.c +++ b/src/common/assoc_mgr.c @@ -44,12 +44,14 @@ #include "src/slurmdbd/read_config.h" static List local_association_list = NULL; +static List local_qos_list = NULL; static List local_user_list = NULL; static char *local_cluster_name = NULL; void (*remove_assoc_notify) (acct_association_rec_t *rec) = NULL; static pthread_mutex_t local_association_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_mutex_t local_qos_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t local_user_lock = PTHREAD_MUTEX_INITIALIZER; /* locks should be put in place before calling this function */ @@ -144,6 +146,28 @@ static int _get_local_association_list(void *db_conn, int enforce) return SLURM_SUCCESS; } +static int _get_local_qos_list(void *db_conn, int enforce) +{ + slurm_mutex_lock(&local_qos_lock); + if(local_qos_list) + list_destroy(local_qos_list); + local_qos_list = acct_storage_g_get_qos(db_conn, NULL); + + if(!local_qos_list) { + slurm_mutex_unlock(&local_qos_lock); + if(enforce) { + error("_get_local_qos_list: " + "no list was made."); + return SLURM_ERROR; + } else { + return SLURM_SUCCESS; + } + } + + slurm_mutex_unlock(&local_qos_lock); + return SLURM_SUCCESS; +} + static int _get_local_user_list(void *db_conn, int enforce) { acct_user_cond_t user_q; @@ -204,6 +228,10 @@ extern int assoc_mgr_init(void *db_conn, assoc_init_args_t *args) if(_get_local_association_list(db_conn, enforce) == SLURM_ERROR) return SLURM_ERROR; + if(!local_qos_list) + if(_get_local_qos_list(db_conn, enforce) == SLURM_ERROR) + return SLURM_ERROR; + if(!local_user_list) if(_get_local_user_list(db_conn, enforce) == SLURM_ERROR) return SLURM_ERROR; @@ -215,10 +243,13 @@ extern int assoc_mgr_fini(void) { if(local_association_list) list_destroy(local_association_list); + if(local_qos_list) + list_destroy(local_qos_list); if(local_user_list) list_destroy(local_user_list); xfree(local_cluster_name); local_association_list = NULL; + local_qos_list = NULL; local_user_list = NULL; return SLURM_SUCCESS; @@ -293,14 +324,14 @@ extern int assoc_mgr_fill_in_assoc(void *db_conn, acct_association_rec_t *assoc, "nonuser association"); continue; } else if(assoc->uid != found_assoc->uid) { - debug3("not the right user %u != %u", + debug4("not the right user %u != %u", assoc->uid, found_assoc->uid); continue; } if(found_assoc->acct && strcasecmp(assoc->acct, found_assoc->acct)) { - debug3("not the right account %s != %s", + debug4("not the right account %s != %s", assoc->acct, found_assoc->acct); continue; } @@ -309,7 +340,7 @@ extern int assoc_mgr_fill_in_assoc(void *db_conn, acct_association_rec_t *assoc, if(!local_cluster_name && found_assoc->cluster && strcasecmp(assoc->cluster, found_assoc->cluster)) { - debug3("not the right cluster"); + debug4("not the right cluster"); continue; } @@ -491,11 +522,11 @@ extern int assoc_mgr_update_local_assocs(acct_update_object_t *update) continue; } else { if(!object->user && rec->user) { - debug3("we are looking for a " + debug4("we are looking for a " "nonuser association"); continue; } else if(object->uid != rec->uid) { - debug3("not the right user"); + debug4("not the right user"); continue; } @@ -503,7 +534,7 @@ extern int assoc_mgr_update_local_assocs(acct_update_object_t *update) && (!rec->acct || strcasecmp(object->acct, rec->acct))) { - debug3("not the right account"); + debug4("not the right account"); continue; } @@ -512,7 +543,7 @@ extern int assoc_mgr_update_local_assocs(acct_update_object_t *update) && (!rec->cluster || strcasecmp(object->cluster, rec->cluster))) { - debug3("not the right cluster"); + debug4("not the right cluster"); continue; } @@ -520,7 +551,7 @@ extern int assoc_mgr_update_local_assocs(acct_update_object_t *update) && (!rec->partition || strcasecmp(object->partition, rec->partition))) { - debug3("not the right partition"); + debug4("not the right partition"); continue; } break; @@ -655,9 +686,13 @@ extern int assoc_mgr_update_local_users(acct_update_object_t *update) object->default_acct = NULL; } - if(object->qos != ACCT_QOS_NOTSET) - rec->qos = object->qos; - + if(object->qos_list) { + if(rec->qos_list) + list_destroy(rec->qos_list); + rec->qos_list = object->qos_list; + object->qos_list = NULL; + } + if(object->admin_level != ACCT_ADMIN_NOTSET) rec->admin_level = rec->admin_level; @@ -674,6 +709,7 @@ extern int assoc_mgr_update_local_users(acct_update_object_t *update) object->uid = NO_VAL; list_append(local_user_list, object); + break; case ACCT_REMOVE_USER: if(!rec) { //rc = SLURM_ERROR; @@ -712,6 +748,55 @@ extern int assoc_mgr_update_local_users(acct_update_object_t *update) return rc; } +extern int assoc_mgr_update_local_qos(acct_update_object_t *update) +{ + acct_qos_rec_t * rec = NULL; + acct_qos_rec_t * object = NULL; + + ListIterator itr = NULL; + int rc = SLURM_SUCCESS; + + if(!local_qos_list) + return SLURM_SUCCESS; + + slurm_mutex_lock(&local_qos_lock); + itr = list_iterator_create(local_qos_list); + while((object = list_pop(update->objects))) { + list_iterator_reset(itr); + while((rec = list_next(itr))) { + if(object->id == rec->id) { + break; + } + } + //info("%d qos %s", update->type, object->name); + switch(update->type) { + case ACCT_ADD_QOS: + if(rec) { + //rc = SLURM_ERROR; + break; + } + list_append(local_qos_list, object); + break; + case ACCT_REMOVE_QOS: + if(!rec) { + //rc = SLURM_ERROR; + break; + } + list_delete_item(itr); + break; + default: + break; + } + if(update->type != ACCT_ADD_QOS) { + destroy_acct_qos_rec(object); + } + } + list_iterator_destroy(itr); + slurm_mutex_unlock(&local_qos_lock); + + return rc; +} + extern int assoc_mgr_validate_assoc_id(void *db_conn, uint32_t assoc_id, int enforce) diff --git a/src/common/assoc_mgr.h b/src/common/assoc_mgr.h index 512b753d424ee4910c61d2f9bd668b5d1dd833b3..df9b26f9b7664ba728f69faec5ef4776251df8b1 100644 --- a/src/common/assoc_mgr.h +++ b/src/common/assoc_mgr.h @@ -107,6 +107,13 @@ extern int assoc_mgr_fini(void); */ extern int assoc_mgr_update_local_assocs(acct_update_object_t *update); +/* + * update qos in local cache + * IN: acct_update_object_t *object + * RET: SLURM_SUCCESS on success (or not found) SLURM_ERROR else + */ +extern int assoc_mgr_update_local_qos(acct_update_object_t *update); + /* * update users in local cache * IN: acct_update_object_t *object diff --git a/src/common/jobacct_common.c b/src/common/jobacct_common.c index 847e171f547d230d0b6f032d36865a5b39442355..0529d9db4df4f94fa25475ff67a7cf78e08658fe 100644 --- a/src/common/jobacct_common.c +++ b/src/common/jobacct_common.c @@ -197,8 +197,6 @@ extern void destroy_jobacct_selected_step(void *object) { jobacct_selected_step_t *step = (jobacct_selected_step_t *)object; if (step) { - xfree(step->job); - xfree(step->step); xfree(step); } } @@ -380,8 +378,6 @@ unpack_error: extern void pack_jobacct_selected_step(jobacct_selected_step_t *step, Buf buffer) { - packstr(step->job, buffer); - packstr(step->step, buffer); pack32(step->jobid, buffer); pack32(step->stepid, buffer); } @@ -389,14 +385,11 @@ extern void pack_jobacct_selected_step(jobacct_selected_step_t *step, extern int unpack_jobacct_selected_step(jobacct_selected_step_t **step, Buf buffer) { - uint32_t uint32_tmp; jobacct_selected_step_t *step_ptr = xmalloc(sizeof(jobacct_selected_step_t)); *step = step_ptr; - safe_unpackstr_xmalloc(&step_ptr->job, &uint32_tmp, buffer); - safe_unpackstr_xmalloc(&step_ptr->step, &uint32_tmp, buffer); safe_unpack32(&step_ptr->jobid, buffer); safe_unpack32(&step_ptr->stepid, buffer); @@ -830,6 +823,8 @@ extern struct jobacctinfo *jobacct_common_stat_task(pid_t pid) break; } list_iterator_destroy(itr); + if (jobacct == NULL) + goto error; ret_jobacct = xmalloc(sizeof(struct jobacctinfo)); memcpy(ret_jobacct, jobacct, sizeof(struct jobacctinfo)); error: diff --git a/src/common/jobacct_common.h b/src/common/jobacct_common.h index bb373fd9de327dcdc73ad70755e51299efd7dc17..af391dd2339b8f34a16b9f59b3b481048992b2f2 100644 --- a/src/common/jobacct_common.h +++ b/src/common/jobacct_common.h @@ -64,6 +64,7 @@ #include <ctype.h> #define BUFFER_SIZE 4096 +#define FDUMP_FLAG 0x04 typedef struct { uint16_t taskid; /* contains which task number it was on */ @@ -86,30 +87,34 @@ typedef struct { } sacct_t; typedef struct { - char *opt_cluster; /* --cluster */ + List opt_acct_list; /* --accounts list of char * */ + int opt_begin; /* --begin */ + List opt_cluster_list; /* --cluster list of char * */ int opt_completion; /* --completion */ int opt_dump; /* --dump */ int opt_dup; /* --duplicates; +1 = explicitly set */ + int opt_end; /* --end */ + long opt_expire; /* --expire= */ + char *opt_expire_timespec; /* --expire= */ int opt_fdump; /* --formattted_dump */ - int opt_stat; /* --stat */ - int opt_gid; /* --gid (-1=wildcard, 0=root) */ - int opt_header; /* can only be cleared */ + char *opt_field_list; /* --fields= */ + char *opt_filein; /* --file */ + int opt_gid; /* running persons gid */ + List opt_gid_list; /* --gid list of char * */ int opt_help; /* --help */ + List opt_job_list; /* --jobs */ int opt_long; /* --long */ int opt_lowmem; /* --low_memory */ - int opt_raw; /* --raw */ + int opt_noheader; /* can only be cleared */ + List opt_partition_list;/* --partitions */ int opt_purge; /* --purge */ + int opt_raw; /* --raw */ + int opt_stat; /* --stat */ + List opt_state_list; /* --states */ int opt_total; /* --total */ - int opt_uid; /* --uid (-1=wildcard, 0=root) */ - int opt_uid_set; + int opt_uid; /* running persons uid */ + List opt_uid_list; /* --uid list of char * */ int opt_verbose; /* --verbose */ - long opt_expire; /* --expire= */ - char *opt_expire_timespec; /* --expire= */ - char *opt_field_list; /* --fields= */ - char *opt_filein; /* --file */ - char *opt_job_list; /* --jobs */ - char *opt_partition_list;/* --partitions */ - char *opt_state_list; /* --states */ } sacct_parameters_t; typedef struct { @@ -179,8 +184,6 @@ typedef struct { } jobacct_step_rec_t; typedef struct selected_step_t { - char *job; - char *step; uint32_t jobid; uint32_t stepid; } jobacct_selected_step_t; diff --git a/src/common/pack.c b/src/common/pack.c index d48f3c0413b64db56c42c52a97e35b77262e1ad4..07863aa8cc16856e8387f41db3ec0ce9175c987d 100644 --- a/src/common/pack.c +++ b/src/common/pack.c @@ -2,7 +2,8 @@ * pack.c - lowest level un/pack functions * NOTE: The memory buffer will expand as needed using xrealloc() ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Jim Garlick <garlick@llnl.gov>, * Morris Jette <jette1@llnl.gov>, et. al. @@ -93,6 +94,11 @@ Buf create_buf(char *data, int size) { Buf my_buf; + if (size > MAX_BUF_SIZE) { + error("create_buf: buffer size too large"); + return NULL; + } + my_buf = xmalloc(sizeof(struct slurm_buf)); my_buf->magic = BUF_MAGIC; my_buf->size = size; @@ -114,6 +120,11 @@ void free_buf(Buf my_buf) /* Grow a buffer by the specified amount */ void grow_buf (Buf buffer, int size) { + if (buffer->size > (MAX_BUF_SIZE - size)) { + error("grow_buf: buffer size too large"); + return; + } + buffer->size += size; xrealloc(buffer->head, buffer->size); } @@ -123,6 +134,11 @@ Buf init_buf(int size) { Buf my_buf; + if (size > MAX_BUF_SIZE) { + error("init_buf: buffer size too large"); + return NULL; + } + my_buf = xmalloc(sizeof(struct slurm_buf)); my_buf->magic = BUF_MAGIC; my_buf->size = size; @@ -153,6 +169,10 @@ void pack_time(time_t val, Buf buffer) int64_t n64 = HTON_int64((int64_t) val); if (remaining_buf(buffer) < sizeof(n64)) { + if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) { + error("pack_time: buffer size too large"); + return; + } buffer->size += BUF_SIZE; xrealloc(buffer->head, buffer->size); } @@ -184,6 +204,10 @@ void pack64(uint64_t val, Buf buffer) uint64_t nl = HTON_uint64(val); if (remaining_buf(buffer) < sizeof(nl)) { + if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) { + error("pack64: buffer size too large"); + return; + } buffer->size += BUF_SIZE; xrealloc(buffer->head, buffer->size); } @@ -217,6 +241,10 @@ void pack32(uint32_t val, Buf buffer) uint32_t nl = htonl(val); if (remaining_buf(buffer) < sizeof(nl)) { + if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) { + error("pack32: buffer size too large"); + return; + } buffer->size += BUF_SIZE; xrealloc(buffer->head, buffer->size); } @@ -308,6 +336,10 @@ void pack16(uint16_t val, Buf buffer) uint16_t ns = htons(val); if (remaining_buf(buffer) < sizeof(ns)) { + if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) { + error("pack16: buffer size too large"); + return; + } buffer->size += BUF_SIZE; xrealloc(buffer->head, buffer->size); } @@ -340,6 +372,10 @@ int unpack16(uint16_t * valp, Buf buffer) void pack8(uint8_t val, Buf buffer) { if (remaining_buf(buffer) < sizeof(uint8_t)) { + if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) { + error("pack8: buffer size too large"); + return; + } buffer->size += BUF_SIZE; xrealloc(buffer->head, buffer->size); } @@ -372,6 +408,10 @@ void packmem(char *valp, uint32_t size_val, Buf buffer) uint32_t ns = htonl(size_val); if (remaining_buf(buffer) < (sizeof(ns) + size_val)) { + if (buffer->size > (MAX_BUF_SIZE - size_val - BUF_SIZE)) { + error("packmem: buffer size too large"); + return; + } buffer->size += (size_val + BUF_SIZE); xrealloc(buffer->head, buffer->size); } @@ -520,6 +560,10 @@ void packstr_array(char **valp, uint32_t size_val, Buf buffer) uint32_t ns = htonl(size_val); if (remaining_buf(buffer) < sizeof(ns)) { + if (buffer->size > (MAX_BUF_SIZE - BUF_SIZE)) { + error("packstr_array: buffer size too large"); + return; + } buffer->size += BUF_SIZE; xrealloc(buffer->head, buffer->size); } @@ -574,6 +618,10 @@ int unpackstr_array(char ***valp, uint32_t * size_valp, Buf buffer) void packmem_array(char *valp, uint32_t size_val, Buf buffer) { if (remaining_buf(buffer) < size_val) { + if (buffer->size > (MAX_BUF_SIZE - size_val - BUF_SIZE)) { + error("packmem_array: buffer size too large"); + return; + } buffer->size += (size_val + BUF_SIZE); xrealloc(buffer->head, buffer->size); } diff --git a/src/common/pack.h b/src/common/pack.h index 3e537b6e07519b8e896220c783889986bf303bd9..40dad8a808d93c22b960259b3a7d0a4af6b60fac 100644 --- a/src/common/pack.h +++ b/src/common/pack.h @@ -2,7 +2,8 @@ * pack.h - definitions for lowest level un/pack functions. all functions * utilize a Buf structure. Call init_buf, un/pack, and free_buf ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Kevin Tew <tew1@llnl.gov>, Morris Jette <jette1@llnl.gov>, et. al. * LLNL-CODE-402394. @@ -58,6 +59,7 @@ #define BUF_MAGIC 0x42554545 #define BUF_SIZE (16 * 1024) +#define MAX_BUF_SIZE ((uint32_t) 0xffff0000) /* avoid going over 32-bits */ struct slurm_buf { uint32_t magic; diff --git a/src/common/plugin.c b/src/common/plugin.c index 143e82ee0c2d12e1292a29b231497ec4f9d4fa46..cab64e29b4029ee2d363a72e1186aa8e0d3fd426 100644 --- a/src/common/plugin.c +++ b/src/common/plugin.c @@ -301,7 +301,7 @@ plugin_get_syms( plugin_handle_t plug, if ( ptrs[ i ] ) ++count; else - debug4("Couldn't find sym '%s' in the plugin", + debug3("Couldn't find sym '%s' in the plugin", names[ i ]); } diff --git a/src/common/plugstack.c b/src/common/plugstack.c index 09e9899df2f7bffc85b29fc34d65b3f046d08b05..831c5344a5466c74c90eac02e1c8bebc55ae71e5 100644 --- a/src/common/plugstack.c +++ b/src/common/plugstack.c @@ -63,6 +63,7 @@ struct spank_plugin_operations { spank_f *init; + spank_f *init_post_opt; spank_f *local_user_init; spank_f *user_init; spank_f *user_task_init; @@ -71,9 +72,10 @@ struct spank_plugin_operations { spank_f *exit; }; -const int n_spank_syms = 7; +const int n_spank_syms = 8; const char *spank_syms[] = { "slurm_spank_init", + "slurm_spank_init_post_opt", "slurm_spank_local_user_init", "slurm_spank_user_init", "slurm_spank_task_init", @@ -131,6 +133,7 @@ typedef enum spank_handle_type { */ typedef enum step_fn { SPANK_INIT = 0, + SPANK_INIT_POST_OPT, LOCAL_USER_INIT, STEP_USER_INIT, STEP_USER_TASK_INIT, @@ -449,6 +452,8 @@ static const char *_step_fn_name(step_fn_t type) switch (type) { case SPANK_INIT: return ("init"); + case SPANK_INIT_POST_OPT: + return ("init_post_opt"); case LOCAL_USER_INIT: return ("local_user_init"); case STEP_USER_INIT: @@ -498,6 +503,14 @@ static int _do_call_stack(step_fn_t type, void * job, int taskid) fn_name, rc); } break; + case SPANK_INIT_POST_OPT: + if (sp->ops.init_post_opt) { + rc = (*sp->ops.init_post_opt) (spank, sp->ac, + sp->argv); + debug2("spank: %s: %s = %d\n", name, + fn_name, rc); + } + break; case LOCAL_USER_INIT: if (sp->ops.local_user_init) { rc = (*sp->ops.local_user_init) (spank, sp->ac, @@ -586,6 +599,9 @@ int spank_init(slurmd_job_t * job) return (-1); } + if (_do_call_stack(SPANK_INIT_POST_OPT, job, -1) < 0) + return (-1); + return (0); } @@ -1110,7 +1126,7 @@ global_to_local_id (slurmd_job_t *job, uint32_t gid, uint32_t *p2uint32) /* * Return 1 if spank_item_t is valid for S_TYPE_LOCAL */ -static int valid_in_local_context (spank_item_t item) +static int _valid_in_local_context (spank_item_t item) { int rc = 0; switch (item) { @@ -1130,6 +1146,24 @@ static int valid_in_local_context (spank_item_t item) return (rc); } +/* + * Return 1 if spank_item_t is just getting version (valid anywhere) + */ +static int _version_check (spank_item_t item) +{ + int rc = 0; + switch (item) { + case S_SLURM_VERSION: + case S_SLURM_VERSION_MAJOR: + case S_SLURM_VERSION_MINOR: + case S_SLURM_VERSION_MICRO: + rc = 1; + break; + default: + rc = 0; + } + return (rc); +} /* * Global functions for SPANK plugins @@ -1172,6 +1206,7 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) pid_t *p2pid; pid_t pid; char ***p2argv; + char **p2vers; slurmd_task_info_t *task; slurmd_job_t *slurmd_job = NULL; struct spank_launcher_job_info *launcher_job = NULL; @@ -1181,17 +1216,20 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) if ((spank == NULL) || (spank->magic != SPANK_MAGIC)) return (ESPANK_BAD_ARG); - if ( (spank->type != S_TYPE_REMOTE) - && (!valid_in_local_context(item))) - return (ESPANK_NOT_REMOTE); + if (!_version_check(item)) { + /* Need job pointer to process other items */ + if ( (spank->type != S_TYPE_REMOTE) + && (!_valid_in_local_context(item))) + return (ESPANK_NOT_REMOTE); - if (spank->job == NULL) - return (ESPANK_BAD_ARG); + if (spank->job == NULL) + return (ESPANK_BAD_ARG); - if (spank->type == S_TYPE_LOCAL) - launcher_job = spank->job; - else - slurmd_job = spank->job; + if (spank->type == S_TYPE_LOCAL) + launcher_job = spank->job; + else + slurmd_job = spank->job; + } va_start(vargs, item); switch (item) { @@ -1255,6 +1293,10 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) p2uint16 = va_arg(vargs, uint16_t *); *p2uint16 = slurmd_job->cpus; break; + case S_STEP_CPUS_PER_TASK: + p2uint32 = va_arg(vargs, uint32_t *); + *p2uint32 = slurmd_job->cpus_per_task; + break; case S_JOB_ARGV: p2int = va_arg(vargs, int *); p2argv = va_arg(vargs, char ***); @@ -1343,6 +1385,22 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) p2uint32 = va_arg(vargs, uint32_t *); rc = global_to_local_id (slurmd_job, uint32, p2uint32); break; + case S_SLURM_VERSION: + p2vers = va_arg(vargs, char **); + *p2vers = SLURM_VERSION; + break; + case S_SLURM_VERSION_MAJOR: + p2vers = va_arg(vargs, char **); + *p2vers = SLURM_MAJOR; + break; + case S_SLURM_VERSION_MINOR: + p2vers = va_arg(vargs, char **); + *p2vers = SLURM_MINOR; + break; + case S_SLURM_VERSION_MICRO: + p2vers = va_arg(vargs, char **); + *p2vers = SLURM_MICRO; + break; default: rc = ESPANK_BAD_ARG; break; diff --git a/src/common/print_fields.c b/src/common/print_fields.c index 398a8597f29c909389bfc91ded724f1babe5826c..cb5d3ed858ee86d58160686406ba45c4dec099d8 100644 --- a/src/common/print_fields.c +++ b/src/common/print_fields.c @@ -41,6 +41,18 @@ int print_fields_parsable_print = 0; int print_fields_have_header = 1; +static int _sort_char_list(char *name_a, char *name_b) +{ + int diff = strcmp(name_a, name_b); + + if (diff < 0) + return -1; + else if (diff > 0) + return 1; + + return 0; +} + extern void destroy_print_field(void *object) { print_field_t *field = (print_field_t *)object; @@ -54,242 +66,151 @@ extern void destroy_print_field(void *object) extern void print_fields_header(List print_fields_list) { ListIterator itr = NULL; - print_field_t *object = NULL; + print_field_t *field = NULL; if(!print_fields_list || !print_fields_have_header) return; itr = list_iterator_create(print_fields_list); - while((object = list_next(itr))) { - (object->print_routine)(SLURM_PRINT_HEADLINE, object, 0); + while((field = list_next(itr))) { + if(print_fields_parsable_print) + printf("%s|", field->name); + else + printf("%-*.*s ", field->len, field->len, field->name); } list_iterator_reset(itr); printf("\n"); if(print_fields_parsable_print) return; - while((object = list_next(itr))) { - (object->print_routine)(SLURM_PRINT_UNDERSCORE, object, 0); + while((field = list_next(itr))) { + printf("%-*.*s ", field->len, field->len, + "-----------------------------------------------------"); } list_iterator_destroy(itr); printf("\n"); } -extern void print_fields_date(void) +extern void print_fields_date(print_field_t *field, time_t value) { - time_t now; - - now = time(NULL); - printf("%s", ctime(&now)); + char temp_char[field->len]; + time_t now = value; + if(!now) + now = time(NULL); + slurm_make_time_str(&value, (char *)temp_char, field->len); + if(print_fields_parsable_print) + printf("%s|", temp_char); + else + printf("%-*.*s ", field->len, field->len, temp_char); } -extern void print_fields_str(type_t type, print_field_t *field, char *value) +extern void print_fields_str(print_field_t *field, char *value) { - char *print_this = value; + char temp_char[field->len]; + char *print_this = NULL; - switch(type) { - case SLURM_PRINT_HEADLINE: + if(!value) { if(print_fields_parsable_print) - printf("%s|", field->name); + print_this = ""; else - printf("%-*.*s ", field->len, field->len, field->name); - break; - case SLURM_PRINT_UNDERSCORE: - if(!print_fields_parsable_print) - printf("%-*.*s ", field->len, field->len, - "---------------------------------------"); - break; - case SLURM_PRINT_VALUE: + print_this = " "; + } + + if(print_fields_parsable_print) + printf("%s|", value); + else { if(!print_this) { - if(print_fields_parsable_print) - print_this = ""; - else - print_this = " "; - } - - if(print_fields_parsable_print) - printf("%s|", print_this); - else { - if(strlen(print_this) > field->len) - print_this[field->len-1] = '+'; - - printf("%-*.*s ", field->len, field->len, print_this); + memcpy(&temp_char, value, field->len); + + if(strlen(value) > field->len) + temp_char[field->len-1] = '+'; + print_this = temp_char; } - break; - default: - if(print_fields_parsable_print) - printf("%s|", "n/a"); - else - printf("%-*s ", field->len, "n/a"); - break; + printf("%-*.*s ", field->len, field->len, print_this); } } -extern void print_fields_uint32(type_t type, print_field_t *field, - uint32_t value) +extern void print_fields_uint32(print_field_t *field, uint32_t value) { - switch(type) { - case SLURM_PRINT_HEADLINE: + /* (value == unset) || (value == cleared) */ + if((value == NO_VAL) || (value == INFINITE)) { if(print_fields_parsable_print) - printf("%s|", field->name); - else - printf("%*.*s ", field->len, field->len, field->name); - break; - case SLURM_PRINT_UNDERSCORE: - if(!print_fields_parsable_print) - printf("%*.*s ", field->len, field->len, - "---------------------------------------"); - break; - case SLURM_PRINT_VALUE: - /* (value == unset) || (value == cleared) */ - if((value == NO_VAL) || (value == INFINITE)) { - if(print_fields_parsable_print) - printf("|"); - else - printf("%*s ", field->len, " "); - } else { - if(print_fields_parsable_print) - printf("%u|", value); - else - printf("%*u ", field->len, value); - } - break; - default: + printf("|"); + else + printf("%*s ", field->len, " "); + } else { if(print_fields_parsable_print) - printf("%s|", "n/a"); + printf("%u|", value); else - printf("%*.*s ", field->len, field->len, "n/a"); - break; + printf("%*u ", field->len, value); } } -extern void print_fields_uint64(type_t type, print_field_t *field, - uint64_t value) +extern void print_fields_uint64(print_field_t *field, uint64_t value) { - switch(type) { - case SLURM_PRINT_HEADLINE: + /* (value == unset) || (value == cleared) */ + if((value == NO_VAL) || (value == INFINITE)) { if(print_fields_parsable_print) - printf("%s|", field->name); - else - printf("%*.*s ", field->len, field->len, field->name); - break; - case SLURM_PRINT_UNDERSCORE: - if(!print_fields_parsable_print) - printf("%*.*s ", field->len, field->len, - "---------------------------------------"); - break; - case SLURM_PRINT_VALUE: - /* (value == unset) || (value == cleared) */ - if((value == NO_VAL) || (value == INFINITE)) { - if(print_fields_parsable_print) - printf("|"); - else - printf("%*s ", field->len, " "); - } else { - if(print_fields_parsable_print) - printf("%llu|", (long long unsigned) value); - else - printf("%*llu ", field->len, - (long long unsigned) value); - } - break; - default: + printf("|"); + else + printf("%*s ", field->len, " "); + } else { if(print_fields_parsable_print) - printf("%s|", "n/a"); + printf("%llu|", (long long unsigned) value); else - printf("%*.*s ", field->len, field->len, "n/a"); - break; + printf("%*llu ", field->len, + (long long unsigned) value); } } -extern void print_fields_time(type_t type, print_field_t *field, uint32_t value) +extern void print_fields_time(print_field_t *field, uint32_t value) { - switch(type) { - case SLURM_PRINT_HEADLINE: + /* (value == unset) || (value == cleared) */ + if((value == NO_VAL) || (value == INFINITE)) { if(print_fields_parsable_print) - printf("%s|", field->name); + printf("|"); else - printf("%*.*s ", field->len, field->len, field->name); - break; - case SLURM_PRINT_UNDERSCORE: - if(!print_fields_parsable_print) - printf("%*.*s ", field->len, field->len, - "---------------------------------------"); - break; - case SLURM_PRINT_VALUE: - /* (value == unset) || (value == cleared) */ - if((value == NO_VAL) || (value == INFINITE)) { - if(print_fields_parsable_print) - printf("|"); - else - printf("%*s ", field->len, " "); - } else { - char time_buf[32]; - mins2time_str((time_t) value, - time_buf, sizeof(time_buf)); - if(print_fields_parsable_print) - printf("%s|", time_buf); - else - printf("%*s ", field->len, time_buf); - } - break; - default: - printf("%*.*s ", field->len, field->len, "n/a"); - break; + printf("%*s ", field->len, " "); + } else { + char time_buf[32]; + mins2time_str((time_t) value, time_buf, sizeof(time_buf)); + if(print_fields_parsable_print) + printf("%s|", time_buf); + else + printf("%*s ", field->len, time_buf); } } -extern void print_fields_char_list(type_t type, print_field_t *field, - List value) +extern void print_fields_char_list(print_field_t *field, List value) { ListIterator itr = NULL; char *print_this = NULL; char *object = NULL; - switch(type) { - case SLURM_PRINT_HEADLINE: + if(!value || !list_count(value)) { if(print_fields_parsable_print) - printf("%s|", field->name); + print_this = xstrdup(""); else - printf("%-*.*s ", field->len, field->len, field->name); - break; - case SLURM_PRINT_UNDERSCORE: - if(!print_fields_parsable_print) - printf("%-*.*s ", field->len, field->len, - "---------------------------------------"); - break; - case SLURM_PRINT_VALUE: - if(!value || !list_count(value)) { - if(print_fields_parsable_print) - print_this = xstrdup(""); - else - print_this = xstrdup(" "); - } else { - itr = list_iterator_create(value); - while((object = list_next(itr))) { - if(print_this) - xstrfmtcat(print_this, ",%s", object); - else - print_this = xstrdup(object); - } - list_iterator_destroy(itr); - } - - if(print_fields_parsable_print) - printf("%s|", print_this); - else { - if(strlen(print_this) > field->len) - print_this[field->len-1] = '+'; - - printf("%-*.*s ", field->len, field->len, print_this); + print_this = xstrdup(" "); + } else { + list_sort(value, (ListCmpF)_sort_char_list); + itr = list_iterator_create(value); + while((object = list_next(itr))) { + if(print_this) + xstrfmtcat(print_this, ",%s", object); + else + print_this = xstrdup(object); } - xfree(print_this); - break; - default: - if(print_fields_parsable_print) - printf("%s|", "n/a"); - else - printf("%-*s ", field->len, "n/a"); - break; + list_iterator_destroy(itr); + } + + if(print_fields_parsable_print) + printf("%s|", print_this); + else { + if(strlen(print_this) > field->len) + print_this[field->len-1] = '+'; + + printf("%-*.*s ", field->len, field->len, print_this); } + xfree(print_this); } diff --git a/src/common/print_fields.h b/src/common/print_fields.h index 9264970a6de7e45e94694af725607d669d0f0a82..37ade380f5e27b6327168cacc60d5ef60ec30bc3 100644 --- a/src/common/print_fields.h +++ b/src/common/print_fields.h @@ -67,12 +67,6 @@ #include "src/common/xstring.h" #include "src/common/list.h" -typedef enum { - SLURM_PRINT_HEADLINE, - SLURM_PRINT_UNDERSCORE, - SLURM_PRINT_VALUE -} type_t; - typedef struct { uint16_t len; /* what is the width of the print */ char *name; /* name to be printed in header */ @@ -85,16 +79,12 @@ extern int print_fields_have_header; extern void destroy_print_field(void *object); extern void print_fields_header(List print_fields_list); -extern void print_fields_date(void); -extern void print_fields_str(type_t type, print_field_t *field, char *value); -extern void print_fields_uint32(type_t type, print_field_t *field, - uint32_t value); -extern void print_fields_uint64(type_t type, print_field_t *field, - uint64_t value); -extern void print_fields_time(type_t type, print_field_t *field, - uint32_t value); -extern void print_fields_char_list(type_t type, print_field_t *field, - List value); +extern void print_fields_date(print_field_t *field, time_t value); +extern void print_fields_str(print_field_t *field, char *value); +extern void print_fields_uint32(print_field_t *field, uint32_t value); +extern void print_fields_uint64(print_field_t *field, uint64_t value); +extern void print_fields_time(print_field_t *field, uint32_t value); +extern void print_fields_char_list(print_field_t *field, List value); #define print_fields_uint print_fields_uint32 #endif diff --git a/src/common/read_config.c b/src/common/read_config.c index 724caa3e10bc88c9a36ef2ece883395a6efc2a77..2faba8f6ada731bf340e9ed266271dd23fdceed0 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -146,7 +146,9 @@ s_p_options_t slurm_conf_options[] = { {"DefaultStoragePort", S_P_UINT32}, {"DefaultStorageType", S_P_STRING}, {"DefaultStorageUser", S_P_STRING}, - {"DefMemPerTask", S_P_UINT32}, + {"DefMemPerCPU", S_P_UINT32}, + {"DefMemPerNode", S_P_UINT32}, + {"DefMemPerTask", S_P_UINT32}, /* defunct */ {"DisableRootJobs", S_P_BOOLEAN}, {"EnforcePartLimits", S_P_BOOLEAN}, {"Epilog", S_P_STRING}, @@ -179,14 +181,16 @@ s_p_options_t slurm_conf_options[] = { {"Licenses", S_P_STRING}, {"MailProg", S_P_STRING}, {"MaxJobCount", S_P_UINT16}, - {"MaxMemPerTask", S_P_UINT32}, + {"MaxMemPerCPU", S_P_UINT32}, + {"MaxMemPerNode", S_P_UINT32}, + {"MaxMemPerTask", S_P_UINT32}, /* defunct */ {"MessageTimeout", S_P_UINT16}, {"MinJobAge", S_P_UINT16}, {"MpichGmDirectSupport", S_P_LONG, defunct_option}, {"MpiDefault", S_P_STRING}, {"PluginDir", S_P_STRING}, {"PlugStackConfig", S_P_STRING}, - {"PrivateData", S_P_UINT16}, + {"PrivateData", S_P_STRING}, {"ProctrackType", S_P_STRING}, {"Prolog", S_P_STRING}, {"PropagatePrioProcess", S_P_UINT16}, @@ -1551,7 +1555,7 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) s_p_get_string(&default_storage_host, "DefaultStorageHost", hashtbl); s_p_get_string(&default_storage_user, "DefaultStorageUser", hashtbl); s_p_get_string(&default_storage_pass, "DefaultStoragePass", hashtbl); - s_p_get_string(&default_storage_loc, "DefaultStorageLoc", hashtbl); + s_p_get_string(&default_storage_loc, "DefaultStorageLoc", hashtbl); s_p_get_uint32(&default_storage_port, "DefaultStoragePort", hashtbl); if (!s_p_get_string(&conf->job_credential_private_key, @@ -1577,8 +1581,11 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_string(&conf->crypto_type, "CryptoType", hashtbl)) conf->crypto_type = xstrdup(DEFAULT_CRYPTO_TYPE); - if (!s_p_get_uint32(&conf->def_mem_per_task, "DefMemPerTask", hashtbl)) - conf->def_mem_per_task = DEFAULT_MEM_PER_TASK; + if ((s_p_get_uint32(&conf->def_mem_per_task, "DefMemPerCPU", hashtbl)) || + (s_p_get_uint32(&conf->def_mem_per_task, "DefMemPerTask", hashtbl))) + conf->def_mem_per_task |= MEM_PER_CPU; + else if (!s_p_get_uint32(&conf->def_mem_per_task, "DefMemPerNode", hashtbl)) + conf->def_mem_per_task = DEFAULT_MEM_PER_CPU; if (!s_p_get_boolean((bool *) &conf->disable_root_jobs, "DisableRootJobs", hashtbl)) @@ -1586,7 +1593,7 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_boolean((bool *) &conf->enforce_part_limits, "EnforcePartLimits", hashtbl)) - conf->disable_root_jobs = DEFAULT_ENFORCE_PART_LIMITS; + conf->enforce_part_limits = DEFAULT_ENFORCE_PART_LIMITS; s_p_get_string(&conf->epilog, "Epilog", hashtbl); @@ -1640,11 +1647,21 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) xstrdup(DEFAULT_JOB_ACCT_GATHER_TYPE); if (!s_p_get_string(&conf->job_comp_type, "JobCompType", hashtbl)) { - if(default_storage_type) - conf->job_comp_type = - xstrdup_printf("jobcomp/%s", - default_storage_type); - else + if(default_storage_type) { + if(!strcasecmp("slurmdbd", default_storage_type)) { + error("Can not use the default storage type " + "specified for jobcomp since there is " + "not slurmdbd type. We are using %s " + "as the type. To disable this message " + "set JobCompType in your slurm.conf", + DEFAULT_JOB_COMP_TYPE); + conf->job_comp_type = + xstrdup(DEFAULT_JOB_COMP_TYPE); + } else + conf->job_comp_type = + xstrdup_printf("jobcomp/%s", + default_storage_type); + } else conf->job_comp_type = xstrdup(DEFAULT_JOB_COMP_TYPE); } if (!s_p_get_string(&conf->job_comp_loc, "JobCompLoc", hashtbl)) { @@ -1708,8 +1725,11 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_uint16(&conf->max_job_cnt, "MaxJobCount", hashtbl)) conf->max_job_cnt = DEFAULT_MAX_JOB_COUNT; - if (!s_p_get_uint32(&conf->max_mem_per_task, "MaxMemPerTask", hashtbl)) - conf->max_mem_per_task = DEFAULT_MAX_MEM_PER_TASK; + if ((s_p_get_uint32(&conf->max_mem_per_task, "MaxMemPerCPU", hashtbl)) || + (s_p_get_uint32(&conf->max_mem_per_task, "MaxMemPerTask", hashtbl))) + conf->max_mem_per_task |= MEM_PER_CPU; + else if (!s_p_get_uint32(&conf->max_mem_per_task, "MaxMemPerNode", hashtbl)) + conf->max_mem_per_task = DEFAULT_MAX_MEM_PER_CPU; if (!s_p_get_uint16(&conf->msg_timeout, "MessageTimeout", hashtbl)) conf->msg_timeout = DEFAULT_MSG_TIMEOUT; @@ -1807,7 +1827,17 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) && (!strcmp(conf->proctrack_type,"proctrack/linuxproc"))) fatal("proctrack/linuxproc is incompatable with switch/elan"); - s_p_get_uint16(&conf->private_data, "PrivateData", hashtbl); + if (s_p_get_string(&temp_str, "PrivateData", hashtbl)) { + if (strstr(temp_str, "job")) + conf->private_data |= PRIVATE_DATA_JOBS; + if (strstr(temp_str, "node")) + conf->private_data |= PRIVATE_DATA_NODES; + if (strstr(temp_str, "partition")) + conf->private_data |= PRIVATE_DATA_PARTITIONS; + if (strstr(temp_str, "all")) + conf->private_data = 0xffff; + xfree(temp_str); + } s_p_get_string(&conf->prolog, "Prolog", hashtbl); diff --git a/src/common/read_config.h b/src/common/read_config.h index cddc90068df4f3ff9030a6e939b2398279da16d7..c64361b4ccc6fcee48786a46883c865e225b4286 100644 --- a/src/common/read_config.h +++ b/src/common/read_config.h @@ -73,8 +73,8 @@ extern char *default_plugstack; #define DEFAULT_KILL_WAIT 30 #define DEFAULT_MAIL_PROG "/bin/mail" #define DEFAULT_MAX_JOB_COUNT 5000 -#define DEFAULT_MEM_PER_TASK 0 -#define DEFAULT_MAX_MEM_PER_TASK 0 +#define DEFAULT_MEM_PER_CPU 0 +#define DEFAULT_MAX_MEM_PER_CPU 0 #define DEFAULT_MIN_JOB_AGE 300 #define DEFAULT_MPI_DEFAULT "none" #define DEFAULT_MSG_TIMEOUT 10 diff --git a/src/common/slurm_accounting_storage.c b/src/common/slurm_accounting_storage.c index 597a7ee5befa4034ab7ce1367bb83061a0c4114c..6265ce45ab20deb7f55f8499d0e5a21b33c85447 100644 --- a/src/common/slurm_accounting_storage.c +++ b/src/common/slurm_accounting_storage.c @@ -65,44 +65,52 @@ typedef struct slurm_acct_storage_ops { List user_list); int (*add_coord) (void *db_conn, uint32_t uid, List acct_list, - acct_user_cond_t *user_q); + acct_user_cond_t *user_cond); int (*add_accts) (void *db_conn, uint32_t uid, List acct_list); int (*add_clusters) (void *db_conn, uint32_t uid, List cluster_list); int (*add_associations) (void *db_conn, uint32_t uid, List association_list); + int (*add_qos) (void *db_conn, uint32_t uid, + List qos_list); List (*modify_users) (void *db_conn, uint32_t uid, - acct_user_cond_t *user_q, + acct_user_cond_t *user_cond, acct_user_rec_t *user); List (*modify_accts) (void *db_conn, uint32_t uid, - acct_account_cond_t *acct_q, + acct_account_cond_t *acct_cond, acct_account_rec_t *acct); List (*modify_clusters) (void *db_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q, + acct_cluster_cond_t *cluster_cond, acct_cluster_rec_t *cluster); List (*modify_associations)(void *db_conn, uint32_t uid, - acct_association_cond_t *assoc_q, + acct_association_cond_t *assoc_cond, acct_association_rec_t *assoc); List (*remove_users) (void *db_conn, uint32_t uid, - acct_user_cond_t *user_q); + acct_user_cond_t *user_cond); List (*remove_coord) (void *db_conn, uint32_t uid, List acct_list, - acct_user_cond_t *user_q); + acct_user_cond_t *user_cond); List (*remove_accts) (void *db_conn, uint32_t uid, - acct_account_cond_t *acct_q); + acct_account_cond_t *acct_cond); List (*remove_clusters) (void *db_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q); + acct_cluster_cond_t *cluster_cond); List (*remove_associations)(void *db_conn, uint32_t uid, - acct_association_cond_t *assoc_q); + acct_association_cond_t *assoc_cond); + List (*remove_qos) (void *db_conn, uint32_t uid, + acct_qos_cond_t *qos_cond); List (*get_users) (void *db_conn, - acct_user_cond_t *user_q); + acct_user_cond_t *user_cond); List (*get_accts) (void *db_conn, - acct_account_cond_t *acct_q); + acct_account_cond_t *acct_cond); List (*get_clusters) (void *db_conn, - acct_cluster_cond_t *cluster_q); + acct_cluster_cond_t *cluster_cond); List (*get_associations) (void *db_conn, - acct_association_cond_t *assoc_q); + acct_association_cond_t *assoc_cond); + List (*get_qos) (void *db_conn, + acct_qos_cond_t *qos_cond); + List (*get_txn) (void *db_conn, + acct_txn_cond_t *txn_cond); int (*get_usage) (void *db_conn, void *acct_assoc, time_t start, @@ -160,7 +168,7 @@ typedef struct slurm_acct_storage_context { static slurm_acct_storage_context_t * g_acct_storage_context = NULL; static pthread_mutex_t g_acct_storage_context_lock = - PTHREAD_MUTEX_INITIALIZER; + PTHREAD_MUTEX_INITIALIZER; /* * Local functions @@ -190,6 +198,7 @@ static slurm_acct_storage_ops_t * _acct_storage_get_ops( "acct_storage_p_add_accts", "acct_storage_p_add_clusters", "acct_storage_p_add_associations", + "acct_storage_p_add_qos", "acct_storage_p_modify_users", "acct_storage_p_modify_accounts", "acct_storage_p_modify_clusters", @@ -199,10 +208,13 @@ static slurm_acct_storage_ops_t * _acct_storage_get_ops( "acct_storage_p_remove_accts", "acct_storage_p_remove_clusters", "acct_storage_p_remove_associations", + "acct_storage_p_remove_qos", "acct_storage_p_get_users", "acct_storage_p_get_accts", "acct_storage_p_get_clusters", "acct_storage_p_get_associations", + "acct_storage_p_get_qos", + "acct_storage_p_get_txn", "acct_storage_p_get_usage", "acct_storage_p_roll_usage", "clusteracct_storage_p_node_down", @@ -254,7 +266,7 @@ static slurm_acct_storage_ops_t * _acct_storage_get_ops( c->acct_storage_type ); if ( c->cur_plugin == PLUGIN_INVALID_HANDLE ) { error( "cannot find accounting_storage plugin for %s", - c->acct_storage_type ); + c->acct_storage_type ); return NULL; } @@ -326,6 +338,8 @@ extern void destroy_acct_user_rec(void *object) list_destroy(acct_user->coord_accts); xfree(acct_user->default_acct); xfree(acct_user->name); + if(acct_user->qos_list) + list_destroy(acct_user->qos_list); xfree(acct_user); } } @@ -343,6 +357,8 @@ extern void destroy_acct_account_rec(void *object) xfree(acct_account->description); xfree(acct_account->name); xfree(acct_account->organization); + if(acct_account->qos_list) + list_destroy(acct_account->qos_list); xfree(acct_account); } } @@ -409,6 +425,27 @@ extern void destroy_acct_association_rec(void *object) } } +extern void destroy_acct_qos_rec(void *object) +{ + acct_qos_rec_t *acct_qos = (acct_qos_rec_t *)object; + if(acct_qos) { + xfree(acct_qos->description); + xfree(acct_qos->name); + xfree(acct_qos); + } +} + +extern void destroy_acct_txn_rec(void *object) +{ + acct_txn_rec_t *acct_txn = (acct_txn_rec_t *)object; + if(acct_txn) { + xfree(acct_txn->actor_name); + xfree(acct_txn->set_info); + xfree(acct_txn->where_query); + xfree(acct_txn); + } +} + extern void destroy_acct_user_cond(void *object) { acct_user_cond_t *acct_user = (acct_user_cond_t *)object; @@ -417,8 +454,8 @@ extern void destroy_acct_user_cond(void *object) destroy_acct_association_cond(acct_user->assoc_cond); if(acct_user->def_acct_list) list_destroy(acct_user->def_acct_list); - if(acct_user->user_list) - list_destroy(acct_user->user_list); + if(acct_user->qos_list) + list_destroy(acct_user->qos_list); xfree(acct_user); } } @@ -429,13 +466,13 @@ extern void destroy_acct_account_cond(void *object) (acct_account_cond_t *)object; if(acct_account) { - if(acct_account->acct_list) - list_destroy(acct_account->acct_list); destroy_acct_association_cond(acct_account->assoc_cond); if(acct_account->description_list) list_destroy(acct_account->description_list); if(acct_account->organization_list) list_destroy(acct_account->organization_list); + if(acct_account->qos_list) + list_destroy(acct_account->qos_list); xfree(acct_account); } } @@ -491,12 +528,40 @@ extern void destroy_acct_job_cond(void *object) list_destroy(job_cond->partition_list); if(job_cond->step_list) list_destroy(job_cond->step_list); - if(job_cond->user_list) - list_destroy(job_cond->user_list); + if(job_cond->state_list) + list_destroy(job_cond->state_list); + if(job_cond->userid_list) + list_destroy(job_cond->userid_list); xfree(job_cond); } } +extern void destroy_acct_qos_cond(void *object) +{ + acct_qos_cond_t *acct_qos = (acct_qos_cond_t *)object; + if(acct_qos) { + if(acct_qos->id_list) + list_destroy(acct_qos->id_list); + if(acct_qos->name_list) + list_destroy(acct_qos->name_list); + xfree(acct_qos); + } +} + +extern void destroy_acct_txn_cond(void *object) +{ + acct_txn_cond_t *acct_txn = (acct_txn_cond_t *)object; + if(acct_txn) { + if(acct_txn->action_list) + list_destroy(acct_txn->action_list); + if(acct_txn->actor_list) + list_destroy(acct_txn->actor_list); + if(acct_txn->id_list) + list_destroy(acct_txn->id_list); + xfree(acct_txn); + } +} + extern void destroy_acct_update_object(void *object) { acct_update_object_t *acct_update = @@ -524,6 +589,7 @@ extern void pack_acct_user_rec(void *in, Buf buffer) uint32_t count = 0; acct_coord_rec_t *coord = NULL; acct_association_rec_t *assoc = NULL; + char *tmp_info = NULL; if(!object) { pack16(0, buffer); @@ -531,7 +597,7 @@ extern void pack_acct_user_rec(void *in, Buf buffer) pack32(0, buffer); packnull(buffer); packnull(buffer); - pack16(0, buffer); + pack32(0, buffer); pack32(0, buffer); return; } @@ -565,7 +631,20 @@ extern void pack_acct_user_rec(void *in, Buf buffer) packstr(object->default_acct, buffer); packstr(object->name, buffer); - pack16((uint16_t)object->qos, buffer); + + if(object->qos_list) + count = list_count(object->qos_list); + + pack32(count, buffer); + + if(count) { + itr = list_iterator_create(object->qos_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; pack32(object->uid, buffer); } @@ -577,6 +656,7 @@ extern int unpack_acct_user_rec(void **object, Buf buffer) acct_coord_rec_t *coord = NULL; acct_association_rec_t *assoc = NULL; int i; + char *tmp_info = NULL; *object = object_ptr; safe_unpack16((uint16_t *)&object_ptr->admin_level, buffer); @@ -603,7 +683,14 @@ extern int unpack_acct_user_rec(void **object, Buf buffer) } safe_unpackstr_xmalloc(&object_ptr->default_acct, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&object_ptr->name, &uint32_tmp, buffer); - safe_unpack16((uint16_t *)&object_ptr->qos, buffer); + safe_unpack32(&count, buffer); + if(count) { + object_ptr->qos_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->qos_list, tmp_info); + } + } safe_unpack32(&object_ptr->uid, buffer); return SLURM_SUCCESS; @@ -614,11 +701,11 @@ unpack_error: return SLURM_ERROR; } -extern void pack_update_shares_used(void *object, Buf buffer) +extern void pack_update_shares_used(void *in, Buf buffer) { - shares_used_object_t *object_ptr = (shares_used_object_t *) object; - pack32(object_ptr->assoc_id, buffer); - pack32(object_ptr->shares_used, buffer); + shares_used_object_t *object = (shares_used_object_t *)in; + pack32(object->assoc_id, buffer); + pack32(object->shares_used, buffer); } extern int unpack_update_shares_used(void **object, Buf buffer) @@ -643,6 +730,7 @@ extern void pack_acct_account_rec(void *in, Buf buffer) uint32_t count = 0; acct_account_rec_t *object = (acct_account_rec_t *)in; acct_association_rec_t *assoc = NULL; + char *tmp_info = NULL; if(!object) { pack32(0, buffer); @@ -650,7 +738,7 @@ extern void pack_acct_account_rec(void *in, Buf buffer) packnull(buffer); packnull(buffer); packnull(buffer); - pack16(0, buffer); + pack32(0, buffer); return; } @@ -683,7 +771,20 @@ extern void pack_acct_account_rec(void *in, Buf buffer) packstr(object->description, buffer); packstr(object->name, buffer); packstr(object->organization, buffer); - pack16((uint16_t)object->qos, buffer); + + if(object->qos_list) + count = list_count(object->qos_list); + + pack32(count, buffer); + + if(count) { + itr = list_iterator_create(object->qos_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; } extern int unpack_acct_account_rec(void **object, Buf buffer) @@ -694,6 +795,7 @@ extern int unpack_acct_account_rec(void **object, Buf buffer) acct_coord_rec_t *coord = NULL; acct_association_rec_t *assoc = NULL; acct_account_rec_t *object_ptr = xmalloc(sizeof(acct_account_rec_t)); + char *tmp_info = NULL; *object = object_ptr; @@ -721,7 +823,14 @@ extern int unpack_acct_account_rec(void **object, Buf buffer) safe_unpackstr_xmalloc(&object_ptr->description, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&object_ptr->name, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&object_ptr->organization, &uint32_tmp, buffer); - safe_unpack16((uint16_t *)&object_ptr->qos, buffer); + safe_unpack32(&count, buffer); + if(count) { + object_ptr->qos_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->qos_list, tmp_info); + } + } return SLURM_SUCCESS; @@ -1031,6 +1140,81 @@ unpack_error: return SLURM_ERROR; } +extern void pack_acct_qos_rec(void *in, Buf buffer) +{ + acct_qos_rec_t *object = (acct_qos_rec_t *)in; + if(!object) { + packnull(buffer); + pack32(0, buffer); + packnull(buffer); + return; + } + packstr(object->description, buffer); + pack32(object->id, buffer); + packstr(object->name, buffer); +} + +extern int unpack_acct_qos_rec(void **object, Buf buffer) +{ + uint32_t uint32_tmp; + acct_qos_rec_t *object_ptr = xmalloc(sizeof(acct_qos_rec_t)); + + *object = object_ptr; + safe_unpackstr_xmalloc(&object_ptr->description, &uint32_tmp, buffer); + safe_unpack32(&object_ptr->id, buffer); + safe_unpackstr_xmalloc(&object_ptr->name, &uint32_tmp, buffer); + + return SLURM_SUCCESS; + +unpack_error: + destroy_acct_qos_rec(object_ptr); + *object = NULL; + return SLURM_ERROR; +} + +extern void pack_acct_txn_rec(void *in, Buf buffer) +{ + acct_txn_rec_t *object = (acct_txn_rec_t *)in; + if(!object) { + pack16(0, buffer); + packnull(buffer); + pack32(0, buffer); + packnull(buffer); + pack_time(0, buffer); + packnull(buffer); + return; + } + pack16(object->action, buffer); + packstr(object->actor_name, buffer); + pack32(object->id, buffer); + packstr(object->set_info, buffer); + pack_time(object->timestamp, buffer); + packstr(object->where_query, buffer); +} + +extern int unpack_acct_txn_rec(void **object, Buf buffer) +{ + uint32_t uint32_tmp; + acct_txn_rec_t *object_ptr = xmalloc(sizeof(acct_txn_rec_t)); + + *object = object_ptr; + + safe_unpack16(&object_ptr->action, buffer); + safe_unpackstr_xmalloc(&object_ptr->actor_name, &uint32_tmp, buffer); + safe_unpack32(&object_ptr->id, buffer); + safe_unpackstr_xmalloc(&object_ptr->set_info, &uint32_tmp, buffer); + safe_unpack_time(&object_ptr->timestamp, buffer); + safe_unpackstr_xmalloc(&object_ptr->where_query, &uint32_tmp, buffer); + + return SLURM_SUCCESS; + +unpack_error: + destroy_acct_txn_rec(object_ptr); + *object = NULL; + return SLURM_ERROR; + +} + extern void pack_acct_user_cond(void *in, Buf buffer) { char *tmp_info = NULL; @@ -1042,7 +1226,6 @@ extern void pack_acct_user_cond(void *in, Buf buffer) pack16(0, buffer); pack_acct_association_cond(NULL, buffer); pack32(0, buffer); - pack16(0, buffer); pack32(0, buffer); pack16(0, buffer); pack16(0, buffer); @@ -1068,20 +1251,20 @@ extern void pack_acct_user_cond(void *in, Buf buffer) } count = 0; - pack16((uint16_t)object->qos, buffer); - - if(object->user_list) - count = list_count(object->user_list); + if(object->qos_list) + count = list_count(object->qos_list); pack32(count, buffer); if(count) { - itr = list_iterator_create(object->user_list); + itr = list_iterator_create(object->qos_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } + count = 0; + pack16((uint16_t)object->with_assocs, buffer); pack16((uint16_t)object->with_coords, buffer); pack16((uint16_t)object->with_deleted, buffer); @@ -1112,13 +1295,12 @@ extern int unpack_acct_user_cond(void **object, Buf buffer) list_append(object_ptr->def_acct_list, tmp_info); } } - safe_unpack16((uint16_t *)&object_ptr->qos, buffer); safe_unpack32(&count, buffer); if(count) { - object_ptr->user_list = list_create(slurm_destroy_char); + object_ptr->qos_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); - list_append(object_ptr->user_list, tmp_info); + list_append(object_ptr->qos_list, tmp_info); } } safe_unpack16((uint16_t *)&object_ptr->with_assocs, buffer); @@ -1141,29 +1323,15 @@ extern void pack_acct_account_cond(void *in, Buf buffer) uint32_t count = 0; if(!object) { - pack32(0, buffer); pack_acct_association_cond(NULL, buffer); pack32(0, buffer); pack32(0, buffer); - pack16(0, buffer); + pack32(0, buffer); pack16(0, buffer); pack16(0, buffer); pack16(0, buffer); return; } - if(object->acct_list) - count = list_count(object->acct_list); - - pack32(count, buffer); - - if(count) { - itr = list_iterator_create(object->acct_list); - while((tmp_info = list_next(itr))) { - packstr(tmp_info, buffer); - } - list_iterator_destroy(itr); - } - pack_acct_association_cond(object->assoc_cond, buffer); count = 0; @@ -1192,8 +1360,23 @@ extern void pack_acct_account_cond(void *in, Buf buffer) packstr(tmp_info, buffer); } list_iterator_destroy(itr); + count = 0; + } + + if(object->qos_list) + count = list_count(object->qos_list); + + pack32(count, buffer); + + if(count) { + itr = list_iterator_create(object->qos_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + count = 0; } - pack16((uint16_t)object->qos, buffer); + pack16((uint16_t)object->with_assocs, buffer); pack16((uint16_t)object->with_coords, buffer); pack16((uint16_t)object->with_deleted, buffer); @@ -1208,15 +1391,6 @@ extern int unpack_acct_account_cond(void **object, Buf buffer) char *tmp_info = NULL; *object = object_ptr; - safe_unpack32(&count, buffer); - if(count) { - object_ptr->acct_list = list_create(slurm_destroy_char); - for(i=0; i<count; i++) { - safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); - list_append(object_ptr->acct_list, tmp_info); - } - } - if(unpack_acct_association_cond((void **)&object_ptr->assoc_cond, buffer) == SLURM_ERROR) goto unpack_error; @@ -1237,7 +1411,14 @@ extern int unpack_acct_account_cond(void **object, Buf buffer) list_append(object_ptr->organization_list, tmp_info); } } - safe_unpack16((uint16_t *)&object_ptr->qos, buffer); + safe_unpack32(&count, buffer); + if(count) { + object_ptr->qos_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->qos_list, tmp_info); + } + } safe_unpack16((uint16_t *)&object_ptr->with_assocs, buffer); safe_unpack16((uint16_t *)&object_ptr->with_coords, buffer); safe_unpack16((uint16_t *)&object_ptr->with_deleted, buffer); @@ -1520,12 +1701,15 @@ extern void pack_acct_job_cond(void *in, Buf buffer) pack32(0, buffer); pack32(0, buffer); pack16(0, buffer); + pack16(0, buffer); pack32(0, buffer); pack32(0, buffer); pack32(0, buffer); pack32(0, buffer); pack32(0, buffer); pack32(0, buffer); + pack32(0, buffer); + pack16(0, buffer); return; } @@ -1567,7 +1751,7 @@ extern void pack_acct_job_cond(void *in, Buf buffer) } count = 0; - pack16(object->completion, buffer); + pack16(object->duplicates, buffer); if(object->groupid_list) count = list_count(object->groupid_list); @@ -1607,21 +1791,36 @@ extern void pack_acct_job_cond(void *in, Buf buffer) } count = 0; + if(object->state_list) + count = list_count(object->state_list); + + pack32(count, buffer); + if(count) { + itr = list_iterator_create(object->state_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; + pack32(object->usage_end, buffer); pack32(object->usage_start, buffer); - if(object->user_list) - count = list_count(object->user_list); + if(object->userid_list) + count = list_count(object->userid_list); pack32(count, buffer); if(count) { - itr = list_iterator_create(object->user_list); + itr = list_iterator_create(object->userid_list); while((tmp_info = list_next(itr))) { packstr(tmp_info, buffer); } list_iterator_destroy(itr); } count = 0; + + pack16(object->without_steps, buffer); } extern int unpack_acct_job_cond(void **object, Buf buffer) @@ -1661,7 +1860,7 @@ extern int unpack_acct_job_cond(void **object, Buf buffer) } } - safe_unpack16(&object_ptr->completion, buffer); + safe_unpack16(&object_ptr->duplicates, buffer); safe_unpack32(&count, buffer); if(count) { @@ -1692,18 +1891,29 @@ extern int unpack_acct_job_cond(void **object, Buf buffer) } } + safe_unpack32(&count, buffer); + if(count) { + object_ptr->state_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->state_list, tmp_info); + } + } + safe_unpack32(&object_ptr->usage_end, buffer); safe_unpack32(&object_ptr->usage_start, buffer); safe_unpack32(&count, buffer); if(count) { - object_ptr->user_list = list_create(slurm_destroy_char); + object_ptr->userid_list = list_create(slurm_destroy_char); for(i=0; i<count; i++) { safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); - list_append(object_ptr->user_list, tmp_info); + list_append(object_ptr->userid_list, tmp_info); } } + safe_unpack16(&object_ptr->without_steps, buffer); + return SLURM_SUCCESS; unpack_error: @@ -1712,6 +1922,215 @@ unpack_error: return SLURM_ERROR; } +extern void pack_acct_qos_cond(void *in, Buf buffer) +{ + uint32_t count = 0; + char *tmp_info = NULL; + ListIterator itr = NULL; + acct_qos_cond_t *object = (acct_qos_cond_t *)in; + + if(!object) { + pack32(0, buffer); + pack32(0, buffer); + pack32(0, buffer); + pack16(0, buffer); + return; + } + + if(object->description_list) + count = list_count(object->description_list); + + pack32(count, buffer); + if(count) { + itr = list_iterator_create(object->description_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; + + if(object->id_list) + count = list_count(object->id_list); + + pack32(count, buffer); + if(count) { + itr = list_iterator_create(object->id_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; + + if(object->name_list) + count = list_count(object->name_list); + + pack32(count, buffer); + if(count) { + itr = list_iterator_create(object->name_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; + + pack16(object->with_deleted, buffer); +} + +extern int unpack_acct_qos_cond(void **object, Buf buffer) +{ + uint32_t uint32_tmp; + int i; + uint32_t count; + acct_qos_cond_t *object_ptr = xmalloc(sizeof(acct_qos_cond_t)); + char *tmp_info = NULL; + + *object = object_ptr; + + safe_unpack32(&count, buffer); + if(count) { + object_ptr->description_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->description_list, tmp_info); + } + } + + safe_unpack32(&count, buffer); + if(count) { + object_ptr->id_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->id_list, tmp_info); + } + } + + safe_unpack32(&count, buffer); + if(count) { + object_ptr->name_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->name_list, tmp_info); + } + } + + safe_unpack16(&object_ptr->with_deleted, buffer); + return SLURM_SUCCESS; + +unpack_error: + destroy_acct_qos_cond(object_ptr); + *object = NULL; + return SLURM_ERROR; +} + +extern void pack_acct_txn_cond(void *in, Buf buffer) +{ + uint32_t count = 0; + char *tmp_info = NULL; + ListIterator itr = NULL; + acct_txn_cond_t *object = (acct_txn_cond_t *)in; + + if(!object) { + pack32(0, buffer); + pack32(0, buffer); + pack32(0, buffer); + pack32(0, buffer); + pack32(0, buffer); + return; + } + if(object->action_list) + count = list_count(object->action_list); + + pack32(count, buffer); + if(count) { + itr = list_iterator_create(object->action_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; + + if(object->actor_list) + count = list_count(object->actor_list); + + pack32(count, buffer); + if(count) { + itr = list_iterator_create(object->actor_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; + + if(object->id_list) + count = list_count(object->id_list); + + pack32(count, buffer); + if(count) { + itr = list_iterator_create(object->id_list); + while((tmp_info = list_next(itr))) { + packstr(tmp_info, buffer); + } + list_iterator_destroy(itr); + } + count = 0; + + pack32(object->time_end, buffer); + pack32(object->time_start, buffer); + +} + +extern int unpack_acct_txn_cond(void **object, Buf buffer) +{ + uint32_t uint32_tmp; + int i; + uint32_t count; + acct_txn_cond_t *object_ptr = xmalloc(sizeof(acct_txn_cond_t)); + char *tmp_info = NULL; + + *object = object_ptr; + safe_unpack32(&count, buffer); + if(count) { + object_ptr->action_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->action_list, tmp_info); + } + } + + safe_unpack32(&count, buffer); + if(count) { + object_ptr->actor_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->actor_list, tmp_info); + } + } + + safe_unpack32(&count, buffer); + if(count) { + object_ptr->id_list = list_create(slurm_destroy_char); + for(i=0; i<count; i++) { + safe_unpackstr_xmalloc(&tmp_info, &uint32_tmp, buffer); + list_append(object_ptr->id_list, tmp_info); + } + } + + safe_unpack32(&object_ptr->time_end, buffer); + safe_unpack32(&object_ptr->time_start, buffer); + + return SLURM_SUCCESS; + +unpack_error: + destroy_acct_txn_cond(object_ptr); + *object = NULL; + return SLURM_ERROR; +} + extern void pack_acct_update_object(acct_update_object_t *object, Buf buffer) { uint32_t count = 0; @@ -1733,6 +2152,10 @@ extern void pack_acct_update_object(acct_update_object_t *object, Buf buffer) case ACCT_REMOVE_ASSOC: my_function = pack_acct_association_rec; break; + case ACCT_ADD_QOS: + case ACCT_REMOVE_QOS: + my_function = pack_acct_qos_rec; + break; case ACCT_UPDATE_NOTSET: default: error("unknown type set in update_object: %d", object->type); @@ -1779,6 +2202,11 @@ extern int unpack_acct_update_object(acct_update_object_t **object, Buf buffer) my_function = unpack_acct_association_rec; my_destroy = destroy_acct_association_rec; break; + case ACCT_ADD_QOS: + case ACCT_REMOVE_QOS: + my_function = unpack_acct_qos_rec; + my_destroy = destroy_acct_qos_rec; + break; case ACCT_UPDATE_NOTSET: default: error("unknown type set in update_object: %d", @@ -1803,46 +2231,55 @@ unpack_error: return SLURM_ERROR; } -extern char *acct_qos_str(acct_qos_level_t level) +extern char *acct_qos_str(List qos_list, uint32_t level) { - switch(level) { - case ACCT_QOS_NOTSET: - return "Not Set"; - break; - case ACCT_QOS_NORMAL: - return "Normal"; - break; - case ACCT_QOS_EXPEDITE: - return "Expedite"; - break; - case ACCT_QOS_STANDBY: - return "Standby"; - break; - case ACCT_QOS_EXEMPT: - return "Exempt"; - break; - default: - return "Unknown"; - break; + ListIterator itr = NULL; + acct_qos_rec_t *qos = NULL; + + if(!qos_list) { + error("We need a qos list to translate"); + return NULL; + } else if(!level) { + debug2("no level"); + return "None"; } - return "Unknown"; + + itr = list_iterator_create(qos_list); + while((qos = list_next(itr))) { + if(level == qos->id) + break; + } + list_iterator_destroy(itr); + if(qos) + return qos->name; + else + return NULL; } -extern acct_qos_level_t str_2_acct_qos(char *level) +extern uint32_t str_2_acct_qos(List qos_list, char *level) { - if(!level) { - return ACCT_QOS_NOTSET; - } else if(!strncasecmp(level, "Normal", 1)) { - return ACCT_QOS_NORMAL; - } else if(!strncasecmp(level, "Expedite", 3)) { - return ACCT_QOS_EXPEDITE; - } else if(!strncasecmp(level, "Standby", 1)) { - return ACCT_QOS_STANDBY; - } else if(!strncasecmp(level, "Exempt", 3)) { - return ACCT_QOS_EXEMPT; - } else { - return ACCT_QOS_NOTSET; + ListIterator itr = NULL; + acct_qos_rec_t *qos = NULL; + + if(!qos_list) { + error("We need a qos list to translate"); + return NO_VAL; + } else if(!level) { + debug2("no level"); + return 0; + } + + + itr = list_iterator_create(qos_list); + while((qos = list_next(itr))) { + if(!strncasecmp(level, qos->name, strlen(level))) + break; } + list_iterator_destroy(itr); + if(qos) + return qos->id; + else + return NO_VAL; } extern char *acct_admin_level_str(acct_admin_level_t level) @@ -1892,12 +2329,12 @@ extern void log_assoc_rec(acct_association_rec_t *assoc_ptr) debug(" fairshare : NONE"); else debug(" fairshare : %u", - assoc_ptr->fairshare); + assoc_ptr->fairshare); if(assoc_ptr->max_cpu_secs_per_job == INFINITE) debug(" max_cpu_secs_per_job : NONE"); else debug(" max_cpu_secs_per_job : %d", - assoc_ptr->max_cpu_secs_per_job); + assoc_ptr->max_cpu_secs_per_job); if(assoc_ptr->max_jobs == INFINITE) debug(" max_jobs : NONE"); else @@ -1906,12 +2343,12 @@ extern void log_assoc_rec(acct_association_rec_t *assoc_ptr) debug(" max_nodes_per_job : NONE"); else debug(" max_nodes_per_job : %d", - assoc_ptr->max_nodes_per_job); + assoc_ptr->max_nodes_per_job); if(assoc_ptr->max_wall_duration_per_job == INFINITE) debug(" max_wall_duration_per_job : NONE"); else debug(" max_wall_duration_per_job : %d", - assoc_ptr->max_wall_duration_per_job); + assoc_ptr->max_wall_duration_per_job); debug(" parent_acct : %s", assoc_ptr->parent_acct); debug(" partition : %s", assoc_ptr->partition); debug(" user : %s(%u)", @@ -2005,12 +2442,12 @@ extern int acct_storage_g_add_users(void *db_conn, uint32_t uid, } extern int acct_storage_g_add_coord(void *db_conn, uint32_t uid, - List acct_list, acct_user_cond_t *user_q) + List acct_list, acct_user_cond_t *user_cond) { if (slurm_acct_storage_init(NULL) < 0) return SLURM_ERROR; return (*(g_acct_storage_context->ops.add_coord)) - (db_conn, uid, acct_list, user_q); + (db_conn, uid, acct_list, user_cond); } extern int acct_storage_g_add_accounts(void *db_conn, uint32_t uid, @@ -2040,125 +2477,159 @@ extern int acct_storage_g_add_associations(void *db_conn, uint32_t uid, (db_conn, uid, association_list); } +extern int acct_storage_g_add_qos(void *db_conn, uint32_t uid, + List qos_list) +{ + if (slurm_acct_storage_init(NULL) < 0) + return SLURM_ERROR; + return (*(g_acct_storage_context->ops.add_qos)) + (db_conn, uid, qos_list); +} + extern List acct_storage_g_modify_users(void *db_conn, uint32_t uid, - acct_user_cond_t *user_q, - acct_user_rec_t *user) + acct_user_cond_t *user_cond, + acct_user_rec_t *user) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.modify_users)) - (db_conn, uid, user_q, user); + (db_conn, uid, user_cond, user); } extern List acct_storage_g_modify_accounts(void *db_conn, uint32_t uid, - acct_account_cond_t *acct_q, - acct_account_rec_t *acct) + acct_account_cond_t *acct_cond, + acct_account_rec_t *acct) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.modify_accts)) - (db_conn, uid, acct_q, acct); + (db_conn, uid, acct_cond, acct); } extern List acct_storage_g_modify_clusters(void *db_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q, - acct_cluster_rec_t *cluster) + acct_cluster_cond_t *cluster_cond, + acct_cluster_rec_t *cluster) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.modify_clusters)) - (db_conn, uid, cluster_q, cluster); + (db_conn, uid, cluster_cond, cluster); } -extern List acct_storage_g_modify_associations(void *db_conn, uint32_t uid, - acct_association_cond_t *assoc_q, - acct_association_rec_t *assoc) +extern List acct_storage_g_modify_associations( + void *db_conn, uint32_t uid, + acct_association_cond_t *assoc_cond, + acct_association_rec_t *assoc) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.modify_associations)) - (db_conn, uid, assoc_q, assoc); + (db_conn, uid, assoc_cond, assoc); } extern List acct_storage_g_remove_users(void *db_conn, uint32_t uid, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.remove_users)) - (db_conn, uid, user_q); + (db_conn, uid, user_cond); } extern List acct_storage_g_remove_coord(void *db_conn, uint32_t uid, List acct_list, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.remove_coord)) - (db_conn, uid, acct_list, user_q); + (db_conn, uid, acct_list, user_cond); } extern List acct_storage_g_remove_accounts(void *db_conn, uint32_t uid, - acct_account_cond_t *acct_q) + acct_account_cond_t *acct_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.remove_accts)) - (db_conn, uid, acct_q); + (db_conn, uid, acct_cond); } extern List acct_storage_g_remove_clusters(void *db_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q) + acct_cluster_cond_t *cluster_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.remove_clusters)) - (db_conn, uid, cluster_q); + (db_conn, uid, cluster_cond); } -extern List acct_storage_g_remove_associations(void *db_conn, uint32_t uid, - acct_association_cond_t *assoc_q) +extern List acct_storage_g_remove_associations( + void *db_conn, uint32_t uid, + acct_association_cond_t *assoc_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.remove_associations)) - (db_conn, uid, assoc_q); + (db_conn, uid, assoc_cond); +} + +extern List acct_storage_g_remove_qos(void *db_conn, uint32_t uid, + acct_qos_cond_t *qos_cond) +{ + if (slurm_acct_storage_init(NULL) < 0) + return NULL; + return (*(g_acct_storage_context->ops.remove_qos)) + (db_conn, uid, qos_cond); } extern List acct_storage_g_get_users(void *db_conn, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; - return (*(g_acct_storage_context->ops.get_users))(db_conn, user_q); + return (*(g_acct_storage_context->ops.get_users))(db_conn, user_cond); } extern List acct_storage_g_get_accounts(void *db_conn, - acct_account_cond_t *acct_q) + acct_account_cond_t *acct_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.get_accts)) - (db_conn, acct_q); + (db_conn, acct_cond); } extern List acct_storage_g_get_clusters(void *db_conn, - acct_cluster_cond_t *cluster_q) + acct_cluster_cond_t *cluster_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.get_clusters)) - (db_conn, cluster_q); + (db_conn, cluster_cond); } extern List acct_storage_g_get_associations(void *db_conn, - acct_association_cond_t *assoc_q) + acct_association_cond_t *assoc_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.get_associations)) - (db_conn, assoc_q); + (db_conn, assoc_cond); +} + +extern List acct_storage_g_get_qos(void *db_conn, acct_qos_cond_t *qos_cond) +{ + if (slurm_acct_storage_init(NULL) < 0) + return NULL; + return (*(g_acct_storage_context->ops.get_qos))(db_conn, qos_cond); +} + +extern List acct_storage_g_get_txn(void *db_conn, acct_txn_cond_t *txn_cond) +{ + if (slurm_acct_storage_init(NULL) < 0) + return NULL; + return (*(g_acct_storage_context->ops.get_txn))(db_conn, txn_cond); } extern int acct_storage_g_get_usage(void *db_conn, @@ -2273,7 +2744,8 @@ extern int jobacct_storage_g_step_complete (void *db_conn, { if (slurm_acct_storage_init(NULL) < 0) return SLURM_ERROR; - return (*(g_acct_storage_context->ops.step_complete))(db_conn, step_ptr); + return (*(g_acct_storage_context->ops.step_complete))(db_conn, + step_ptr); } /* diff --git a/src/common/slurm_accounting_storage.h b/src/common/slurm_accounting_storage.h index 43cac986a4829bc3edfa11465067d8f2fef8c6a9..f0b7bb3a79e5bad1d99a944cb2bdcb72c10dc033 100644 --- a/src/common/slurm_accounting_storage.h +++ b/src/common/slurm_accounting_storage.h @@ -53,14 +53,6 @@ typedef enum { ACCT_ADMIN_SUPER_USER } acct_admin_level_t; -typedef enum { - ACCT_QOS_NOTSET, - ACCT_QOS_NORMAL, - ACCT_QOS_EXPEDITE, - ACCT_QOS_STANDBY, - ACCT_QOS_EXEMPT -} acct_qos_level_t; - typedef enum { ACCT_UPDATE_NOTSET, ACCT_ADD_USER, @@ -70,7 +62,9 @@ typedef enum { ACCT_MODIFY_ASSOC, ACCT_REMOVE_USER, ACCT_REMOVE_ASSOC, - ACCT_REMOVE_COORD + ACCT_REMOVE_COORD, + ACCT_ADD_QOS, + ACCT_REMOVE_QOS } acct_update_type_t; /* Association conditions used for queries of the database */ @@ -100,11 +94,11 @@ typedef struct { } acct_association_cond_t; typedef struct { - List acct_list; /* list of char * */ - acct_association_cond_t *assoc_cond; + acct_association_cond_t *assoc_cond;/* use acct_list here for + names */ List description_list; /* list of char * */ List organization_list; /* list of char * */ - acct_qos_level_t qos; + List qos_list; /* list of char * */ uint16_t with_assocs; uint16_t with_coords; uint16_t with_deleted; @@ -116,7 +110,7 @@ typedef struct { char *description; char *name; char *organization; - acct_qos_level_t qos; + List qos_list /* list of char *'s */; } acct_account_rec_t; typedef struct { @@ -193,22 +187,36 @@ typedef struct { List acct_list; /* list of char * */ List associd_list; /* list of char */ List cluster_list; /* list of char * */ - uint16_t completion; /* get job completion records instead - * of accounting record */ + uint16_t duplicates; /* report duplicate job entries */ List groupid_list; /* list of char * */ List partition_list; /* list of char * */ List step_list; /* list of jobacct_selected_step_t */ + List state_list; /* list of char * */ uint32_t usage_end; uint32_t usage_start; - List user_list; /* list of char * */ + List userid_list; /* list of char * */ + uint16_t without_steps; /* don't give me step info */ } acct_job_cond_t; +typedef struct { + char *description; + uint32_t id; + char *name; +} acct_qos_rec_t; + +typedef struct { + List description_list; /* list of char * */ + List id_list; /* list of char * */ + List name_list; /* list of char * */ + uint16_t with_deleted; +} acct_qos_cond_t; + typedef struct { acct_admin_level_t admin_level; - acct_association_cond_t *assoc_cond; + acct_association_cond_t *assoc_cond; /* use user_list here for + names */ List def_acct_list; /* list of char * */ - acct_qos_level_t qos; - List user_list; /* list of char * */ + List qos_list; /* list of char * */ uint16_t with_assocs; uint16_t with_coords; uint16_t with_deleted; @@ -220,10 +228,27 @@ typedef struct { List coord_accts; /* list of acct_coord_rec_t *'s */ char *default_acct; char *name; - acct_qos_level_t qos; + List qos_list; /* list of char * */ uint32_t uid; } acct_user_rec_t; +typedef struct { + List action_list; /* list of char * */ + List actor_list; /* list of char * */ + List id_list; /* list of char * */ + uint32_t time_end; + uint32_t time_start; +} acct_txn_cond_t; + +typedef struct { + uint16_t action; + char *actor_name; + uint32_t id; + char *set_info; + time_t timestamp; + char *where_query; +} acct_txn_rec_t; + typedef struct { List objects; /* depending on type */ acct_update_type_t type; @@ -251,52 +276,64 @@ extern void destroy_cluster_accounting_rec(void *object); extern void destroy_acct_cluster_rec(void *object); extern void destroy_acct_accounting_rec(void *object); extern void destroy_acct_association_rec(void *object); +extern void destroy_acct_qos_rec(void *object); +extern void destroy_acct_txn_rec(void *object); extern void destroy_acct_user_cond(void *object); extern void destroy_acct_account_cond(void *object); extern void destroy_acct_cluster_cond(void *object); extern void destroy_acct_association_cond(void *object); extern void destroy_acct_job_cond(void *object); +extern void destroy_acct_qos_cond(void *object); +extern void destroy_acct_txn_cond(void *object); extern void destroy_acct_update_object(void *object); extern void destroy_update_shares_rec(void *object); /* pack functions */ -extern void pack_acct_user_rec(void *object, Buf buffer); +extern void pack_acct_user_rec(void *in, Buf buffer); extern int unpack_acct_user_rec(void **object, Buf buffer); -extern void pack_acct_account_rec(void *object, Buf buffer); +extern void pack_acct_account_rec(void *in, Buf buffer); extern int unpack_acct_account_rec(void **object, Buf buffer); -extern void pack_acct_coord_rec(void *object, Buf buffer); +extern void pack_acct_coord_rec(void *in, Buf buffer); extern int unpack_acct_coord_rec(void **object, Buf buffer); -extern void pack_cluster_accounting_rec(void *object, Buf buffer); +extern void pack_cluster_accounting_rec(void *in, Buf buffer); extern int unpack_cluster_accounting_rec(void **object, Buf buffer); -extern void pack_acct_cluster_rec(void *object, Buf buffer); +extern void pack_acct_cluster_rec(void *in, Buf buffer); extern int unpack_acct_cluster_rec(void **object, Buf buffer); -extern void pack_acct_accounting_rec(void *object, Buf buffer); +extern void pack_acct_accounting_rec(void *in, Buf buffer); extern int unpack_acct_accounting_rec(void **object, Buf buffer); -extern void pack_acct_association_rec(void *object, Buf buffer); +extern void pack_acct_association_rec(void *in, Buf buffer); extern int unpack_acct_association_rec(void **object, Buf buffer); +extern void pack_acct_qos_rec(void *in, Buf buffer); +extern int unpack_acct_qos_rec(void **object, Buf buffer); +extern void pack_acct_txn_rec(void *in, Buf buffer); +extern int unpack_acct_txn_rec(void **object, Buf buffer); -extern void pack_acct_user_cond(void *object, Buf buffer); +extern void pack_acct_user_cond(void *in, Buf buffer); extern int unpack_acct_user_cond(void **object, Buf buffer); -extern void pack_acct_account_cond(void *object, Buf buffer); +extern void pack_acct_account_cond(void *in, Buf buffer); extern int unpack_acct_account_cond(void **object, Buf buffer); -extern void pack_acct_cluster_cond(void *object, Buf buffer); +extern void pack_acct_cluster_cond(void *in, Buf buffer); extern int unpack_acct_cluster_cond(void **object, Buf buffer); -extern void pack_acct_association_cond(void *object, Buf buffer); +extern void pack_acct_association_cond(void *in, Buf buffer); extern int unpack_acct_association_cond(void **object, Buf buffer); -extern void pack_acct_job_cond(void *object, Buf buffer); +extern void pack_acct_job_cond(void *in, Buf buffer); extern int unpack_acct_job_cond(void **object, Buf buffer); +extern void pack_acct_qos_cond(void *in, Buf buffer); +extern int unpack_acct_qos_cond(void **object, Buf buffer); +extern void pack_acct_txn_cond(void *in, Buf buffer); +extern int unpack_acct_txn_cond(void **object, Buf buffer); extern void pack_acct_update_object(acct_update_object_t *object, Buf buffer); extern int unpack_acct_update_object(acct_update_object_t **object, Buf buffer); -extern void pack_update_shares_used(void *object, Buf buffer); +extern void pack_update_shares_used(void *in, Buf buffer); extern int unpack_update_shares_used(void **object, Buf buffer); -extern char *acct_qos_str(acct_qos_level_t level); -extern acct_qos_level_t str_2_acct_qos(char *level); +extern char *acct_qos_str(List qos_list, uint32_t level); +extern uint32_t str_2_acct_qos(List qos_list, char *level); extern char *acct_admin_level_str(acct_admin_level_t level); extern acct_admin_level_t str_2_acct_admin_level(char *level); @@ -340,11 +377,12 @@ extern int acct_storage_g_add_users(void *db_conn, uint32_t uid, /* * add users as account coordinators * IN: acct_list list of char *'s of names of accounts - * IN: acct_user_cond_t *user_q + * IN: acct_user_cond_t *user_cond * RET: SLURM_SUCCESS on success SLURM_ERROR else */ extern int acct_storage_g_add_coord(void *db_conn, uint32_t uid, - List acct_list, acct_user_cond_t *user_q); + List acct_list, + acct_user_cond_t *user_cond); /* @@ -364,94 +402,111 @@ extern int acct_storage_g_add_clusters(void *db_conn, uint32_t uid, List cluster_list); /* - * add accts to accounting system + * add associations to accounting system * IN: association_list List of acct_association_rec_t * * RET: SLURM_SUCCESS on success SLURM_ERROR else */ extern int acct_storage_g_add_associations(void *db_conn, uint32_t uid, List association_list); +/* + * add qos's to accounting system + * IN: qos_list List of char * + * RET: SLURM_SUCCESS on success SLURM_ERROR else + */ +extern int acct_storage_g_add_qos(void *db_conn, uint32_t uid, + List qos_list); + /* * modify existing users in the accounting system - * IN: acct_user_cond_t *user_q + * IN: acct_user_cond_t *user_cond * IN: acct_user_rec_t *user * RET: List containing (char *'s) else NULL on error */ extern List acct_storage_g_modify_users(void *db_conn, uint32_t uid, - acct_user_cond_t *user_q, + acct_user_cond_t *user_cond, acct_user_rec_t *user); /* * modify existing accounts in the accounting system - * IN: acct_acct_cond_t *acct_q + * IN: acct_acct_cond_t *acct_cond * IN: acct_account_rec_t *acct * RET: List containing (char *'s) else NULL on error */ extern List acct_storage_g_modify_accounts(void *db_conn, uint32_t uid, - acct_account_cond_t *acct_q, + acct_account_cond_t *acct_cond, acct_account_rec_t *acct); /* * modify existing clusters in the accounting system - * IN: acct_cluster_cond_t *cluster_q + * IN: acct_cluster_cond_t *cluster_cond * IN: acct_cluster_rec_t *cluster * RET: List containing (char *'s) else NULL on error */ extern List acct_storage_g_modify_clusters(void *db_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q, + acct_cluster_cond_t *cluster_cond, acct_cluster_rec_t *cluster); /* * modify existing associations in the accounting system - * IN: acct_association_cond_t *assoc_q + * IN: acct_association_cond_t *assoc_cond * IN: acct_association_rec_t *assoc * RET: List containing (char *'s) else NULL on error */ -extern List acct_storage_g_modify_associations(void *db_conn, uint32_t uid, - acct_association_cond_t *assoc_q, - acct_association_rec_t *assoc); +extern List acct_storage_g_modify_associations( + void *db_conn, uint32_t uid, + acct_association_cond_t *assoc_cond, + acct_association_rec_t *assoc); /* * remove users from accounting system - * IN: acct_user_cond_t *user_q + * IN: acct_user_cond_t *user_cond * RET: List containing (char *'s) else NULL on error */ extern List acct_storage_g_remove_users(void *db_conn, uint32_t uid, - acct_user_cond_t *user_q); + acct_user_cond_t *user_cond); /* * remove users from being a coordinator of an account * IN: acct_list list of char *'s of names of accounts - * IN: acct_user_cond_t *user_q + * IN: acct_user_cond_t *user_cond * RET: List containing (char *'s) else NULL on error */ extern List acct_storage_g_remove_coord(void *db_conn, uint32_t uid, List acct_list, - acct_user_cond_t *user_q); + acct_user_cond_t *user_cond); /* * remove accounts from accounting system - * IN: acct_account_cond_t *acct_q + * IN: acct_account_cond_t *acct_cond * RET: List containing (char *'s) else NULL on error */ extern List acct_storage_g_remove_accounts(void *db_conn, uint32_t uid, - acct_account_cond_t *acct_q); + acct_account_cond_t *acct_cond); /* * remove clusters from accounting system - * IN: acct_cluster_cond_t *cluster_q + * IN: acct_cluster_cond_t *cluster_cond * RET: List containing (char *'s) else NULL on error */ extern List acct_storage_g_remove_clusters(void *db_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q); + acct_cluster_cond_t *cluster_cond); /* * remove associations from accounting system - * IN: acct_association_cond_t *assoc_q + * IN: acct_association_cond_t *assoc_cond * RET: List containing (char *'s) else NULL on error */ -extern List acct_storage_g_remove_associations(void *db_conn, uint32_t uid, - acct_association_cond_t *assoc_q); +extern List acct_storage_g_remove_associations( + void *db_conn, uint32_t uid, acct_association_cond_t *assoc_cond); + +/* + * remove qos from accounting system + * IN: acct_qos_cond_t *assoc_qos + * RET: List containing (char *'s) else NULL on error + */ +extern List acct_storage_g_remove_qos( + void *db_conn, uint32_t uid, acct_qos_cond_t *qos_cond); /* * get info from the storage @@ -461,7 +516,7 @@ extern List acct_storage_g_remove_associations(void *db_conn, uint32_t uid, * note List needs to be freed when called */ extern List acct_storage_g_get_users(void *db_conn, - acct_user_cond_t *user_q); + acct_user_cond_t *user_cond); /* * get info from the storage @@ -471,7 +526,7 @@ extern List acct_storage_g_get_users(void *db_conn, * note List needs to be freed when called */ extern List acct_storage_g_get_accounts(void *db_conn, - acct_account_cond_t *acct_q); + acct_account_cond_t *acct_cond); /* * get info from the storage @@ -480,8 +535,8 @@ extern List acct_storage_g_get_accounts(void *db_conn, * returns List of acct_cluster_rec_t * * note List needs to be freed when called */ -extern List acct_storage_g_get_clusters(void *db_conn, - acct_cluster_cond_t *cluster_q); +extern List acct_storage_g_get_clusters( + void *db_conn, acct_cluster_cond_t *cluster_cond); /* * get info from the storage @@ -489,8 +544,25 @@ extern List acct_storage_g_get_clusters(void *db_conn, * RET: List of acct_association_rec_t * * note List needs to be freed when called */ -extern List acct_storage_g_get_associations(void *db_conn, - acct_association_cond_t *assoc_q); +extern List acct_storage_g_get_associations( + void *db_conn, acct_association_cond_t *assoc_cond); + + +/* + * get info from the storage + * IN: acct_qos_cond_t * + * RET: List of acct_qos_rec_t * + * note List needs to be freed when called + */ +extern List acct_storage_g_get_qos(void *db_conn, acct_qos_cond_t *qos_cond); + +/* + * get info from the storage + * IN: acct_txn_cond_t * + * RET: List of acct_txn_rec_t * + * note List needs to be freed when called + */ +extern List acct_storage_g_get_txn(void *db_conn, acct_txn_cond_t *txn_cond); /* * get info from the storage diff --git a/src/common/slurm_cred.c b/src/common/slurm_cred.c index 4373f507e42ddd7c35651c5783dbe49ef13d70a0..adc3a68b6b796136410ee794e5019a41486ad66b 100644 --- a/src/common/slurm_cred.c +++ b/src/common/slurm_cred.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/common/slurm_cred.c - SLURM job credential functions - * $Id: slurm_cred.c 14208 2008-06-06 19:15:24Z da $ + * $Id: slurm_cred.c 14499 2008-07-11 22:54:48Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -721,6 +721,40 @@ slurm_cred_faker(slurm_cred_arg_t *arg) } +void slurm_cred_free_args(slurm_cred_arg_t *arg) +{ + xfree(arg->hostlist); + xfree(arg->alloc_lps); + arg->alloc_lps_cnt = 0; +} + +int +slurm_cred_get_args(slurm_cred_t cred, slurm_cred_arg_t *arg) +{ + xassert(cred != NULL); + xassert(arg != NULL); + + /* + * set arguments to cred contents + */ + slurm_mutex_lock(&cred->mutex); + arg->jobid = cred->jobid; + arg->stepid = cred->stepid; + arg->uid = cred->uid; + arg->job_mem = cred->job_mem; + arg->task_mem = cred->task_mem; + arg->hostlist = xstrdup(cred->nodes); + arg->alloc_lps_cnt = cred->alloc_lps_cnt; + if (arg->alloc_lps_cnt > 0) { + arg->alloc_lps = xmalloc(arg->alloc_lps_cnt * sizeof(uint32_t)); + memcpy(arg->alloc_lps, cred->alloc_lps, + arg->alloc_lps_cnt * sizeof(uint32_t)); + } else + arg->alloc_lps = NULL; + slurm_mutex_unlock(&cred->mutex); + + return SLURM_SUCCESS; +} int slurm_cred_verify(slurm_cred_ctx_t ctx, slurm_cred_t cred, @@ -775,13 +809,13 @@ slurm_cred_verify(slurm_cred_ctx_t ctx, slurm_cred_t cred, arg->job_mem = cred->job_mem; arg->task_mem = cred->task_mem; arg->hostlist = xstrdup(cred->nodes); - arg->alloc_lps_cnt = cred->alloc_lps_cnt; - arg->alloc_lps = NULL; - if (arg->alloc_lps_cnt > 0) { - arg->alloc_lps = xmalloc(arg->alloc_lps_cnt * sizeof(uint32_t)); - memcpy(arg->alloc_lps, cred->alloc_lps, - arg->alloc_lps_cnt * sizeof(uint32_t)); - } + arg->alloc_lps_cnt = cred->alloc_lps_cnt; + if (arg->alloc_lps_cnt > 0) { + arg->alloc_lps = xmalloc(arg->alloc_lps_cnt * sizeof(uint32_t)); + memcpy(arg->alloc_lps, cred->alloc_lps, + arg->alloc_lps_cnt * sizeof(uint32_t)); + } else + arg->alloc_lps = NULL; slurm_mutex_unlock(&cred->mutex); diff --git a/src/common/slurm_cred.h b/src/common/slurm_cred.h index 9fb47d6abf5c17ab1cb72be0c805d5ae01a900cf..01ce80550a0d71d80db401ae398abd962928e0db 100644 --- a/src/common/slurm_cred.h +++ b/src/common/slurm_cred.h @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/common/slurm_cred.h - SLURM job credential operations - * $Id: slurm_cred.h 14148 2008-05-28 23:35:40Z jette $ + * $Id: slurm_cred.h 14499 2008-07-11 22:54:48Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -165,6 +165,13 @@ slurm_cred_t slurm_cred_copy(slurm_cred_t cred); */ slurm_cred_t slurm_cred_faker(slurm_cred_arg_t *arg); +/* Free the credential arguments as loaded by either + * slurm_cred_get_args() or slurm_cred_verify() */ +void slurm_cred_free_args(slurm_cred_arg_t *arg); + +/* Make a copy of the credential's arguements */ +int slurm_cred_get_args(slurm_cred_t cred, slurm_cred_arg_t *arg); + /* * Verify the signed credential `cred,' and return cred contents in * the cred_arg structure. The credential is cached and cannot be reused. diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index c513b3f8b6dde3dbcde4aa37378a3e8729c6c7ff..e7674c090d2de6e42d03c361dc67d464bf9cdc3a 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -69,6 +69,7 @@ static void _slurm_free_partition_info_members (partition_info_t * part); static void _free_all_step_info (job_step_info_response_msg_t *msg); static void _slurm_free_job_step_info_members (job_step_info_t * msg); +static void _make_lower(char *change); /* * slurm_msg_t_init - initialize a slurm message @@ -110,6 +111,84 @@ extern void slurm_destroy_char(void *object) xfree(tmp); } +/* returns number of objects added to list */ +extern int slurm_addto_char_list(List char_list, char *names) +{ + int i=0, start=0; + char *name = NULL, *tmp_char = NULL; + ListIterator itr = NULL; + char quote_c = '\0'; + int quote = 0; + int count = 0; + + if(!char_list) { + error("No list was given to fill in"); + return 0; + } + + itr = list_iterator_create(char_list); + if(names) { + if (names[i] == '\"' || names[i] == '\'') { + quote_c = names[i]; + quote = 1; + i++; + } + start = i; + while(names[i]) { + //info("got %d - %d = %d", i, start, i-start); + if(quote && names[i] == quote_c) + break; + else if (names[i] == '\"' || names[i] == '\'') + names[i] = '`'; + else if(names[i] == ',') { + if((i-start) > 0) { + name = xmalloc((i-start+1)); + memcpy(name, names+start, (i-start)); + //info("got %s %d", name, i-start); + + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + _make_lower(name); + list_append(char_list, name); + count++; + } else + xfree(name); + list_iterator_reset(itr); + } + i++; + start = i; + if(!names[i]) { + info("There is a problem with " + "your request. It appears you " + "have spaces inside your list."); + break; + } + } + i++; + } + if((i-start) > 0) { + name = xmalloc((i-start)+1); + memcpy(name, names+start, (i-start)); + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + _make_lower(name); + list_append(char_list, name); + count++; + } else + xfree(name); + } + } + list_iterator_destroy(itr); + return count; +} void slurm_free_last_update_msg(last_update_msg_t * msg) { @@ -637,6 +716,30 @@ void inline slurm_free_will_run_response_msg(will_run_response_msg_t *msg) } } +extern void +private_data_string(uint16_t private_data, char *str, int str_len) +{ + if (str_len > 0) + str[0] = '\0'; + if (str_len < 22) { + error("private_data_string: output buffer too small"); + return; + } + + if (private_data & PRIVATE_DATA_JOBS) + strcat(str, "jobs"); + if (private_data & PRIVATE_DATA_NODES) { + if (str[0]) + strcat(str, ","); + strcat(str, "nodes"); + } + if (private_data & PRIVATE_DATA_PARTITIONS) { + if (str[0]) + strcat(str, ","); + strcat(str, "partitions"); + } +} + char *job_state_string(enum job_states inx) { if (inx & JOB_COMPLETING) @@ -1254,6 +1357,8 @@ extern int slurm_free_msg_data(slurm_msg_type_t type, void *data) slurm_free_suspend_msg(data); break; case REQUEST_JOB_READY: + case REQUEST_JOB_REQUEUE: + case REQUEST_JOB_INFO_SINGLE: slurm_free_job_id_msg(data); break; case REQUEST_NODE_SELECT_INFO: @@ -1371,3 +1476,18 @@ void inline slurm_free_job_notify_msg(job_notify_msg_t * msg) xfree(msg); } } + +/* make everything lowercase should not be called on static char *'s */ +static void _make_lower(char *change) +{ + if(change) { + int j = 0; + while(change[j]) { + char lower = tolower(change[j]); + if(lower != change[j]) + change[j] = lower; + j++; + } + } +} + diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h index 799f83ea8e7cd8eaab16b8e724b943b88dee6533..d6ed3e7ed784bf46be15bd104641f14bbcefbe7e 100644 --- a/src/common/slurm_protocol_defs.h +++ b/src/common/slurm_protocol_defs.h @@ -1,9 +1,8 @@ /****************************************************************************\ * slurm_protocol_defs.h - definitions used for RPCs - * - * $Id: slurm_protocol_defs.h 13755 2008-04-01 19:12:53Z jette $ ***************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Kevin Tew <tew1@llnl.gov>. * LLNL-CODE-402394. @@ -117,6 +116,7 @@ typedef enum { REQUEST_TRIGGER_GET, REQUEST_TRIGGER_CLEAR, RESPONSE_TRIGGER_GET, + REQUEST_JOB_INFO_SINGLE, REQUEST_UPDATE_JOB = 3001, REQUEST_UPDATE_NODE, @@ -746,6 +746,7 @@ extern void slurm_msg_t_init (slurm_msg_t *msg); extern void slurm_msg_t_copy(slurm_msg_t *dest, slurm_msg_t *src); extern void slurm_destroy_char(void *object); +extern int slurm_addto_char_list(List char_list, char *names); /* free message functions */ void inline slurm_free_checkpoint_tasks_msg(checkpoint_tasks_msg_t * msg); @@ -852,14 +853,17 @@ extern char *job_state_string(enum job_states inx); extern char *job_state_string_compact(enum job_states inx); extern char *node_state_string(enum node_states inx); extern char *node_state_string_compact(enum node_states inx); +extern void private_data_string(uint16_t private_data, char *str, int str_len); #define safe_read(fd, buf, size) do { \ int remaining = size; \ char *ptr = (char *) buf; \ int rc; \ while (remaining > 0) { \ - rc = read(fd, ptr, remaining); \ - if (rc == 0) { \ + rc = read(fd, ptr, remaining); \ + if ((rc == 0) && (remaining == size)) \ + goto rwfail; \ + else if (rc == 0) { \ debug("%s:%d: %s: safe_read (%d of %d) EOF", \ __FILE__, __LINE__, __CURRENT_FUNC__, \ remaining, (int)size); \ diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index a0abfea978279ba9d764c3792d2c996ccefe5b56..9be1aabca44cee3585049fb9847c7966b209ee7d 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -691,6 +691,7 @@ pack_msg(slurm_msg_t const *msg, Buf buffer) case REQUEST_JOB_READY: case REQUEST_JOB_REQUEUE: + case REQUEST_JOB_INFO_SINGLE: _pack_job_ready_msg((job_id_msg_t *)msg->data, buffer); break; @@ -1040,6 +1041,7 @@ unpack_msg(slurm_msg_t * msg, Buf buffer) case REQUEST_JOB_READY: case REQUEST_JOB_REQUEUE: + case REQUEST_JOB_INFO_SINGLE: rc = _unpack_job_ready_msg((job_id_msg_t **) & msg->data, buffer); break; diff --git a/src/common/slurmdbd_defs.c b/src/common/slurmdbd_defs.c index 3de583ca23c9410b8ad7fb39ca1ab568541956dd..25818bcf52f3ad330d0d289162f6d68a700ec18c 100644 --- a/src/common/slurmdbd_defs.c +++ b/src/common/slurmdbd_defs.c @@ -342,6 +342,9 @@ extern Buf pack_slurmdbd_msg(slurmdbd_msg_t *req) case DBD_GOT_CLUSTERS: case DBD_GOT_JOBS: case DBD_GOT_LIST: + case DBD_ADD_QOS: + case DBD_GOT_QOS: + case DBD_GOT_TXN: case DBD_GOT_USERS: case DBD_UPDATE_SHARES_USED: slurmdbd_pack_list_msg( @@ -361,10 +364,13 @@ extern Buf pack_slurmdbd_msg(slurmdbd_msg_t *req) case DBD_GET_ASSOCS: case DBD_GET_CLUSTERS: case DBD_GET_JOBS_COND: + case DBD_GET_QOS: + case DBD_GET_TXN: case DBD_GET_USERS: case DBD_REMOVE_ACCOUNTS: case DBD_REMOVE_ASSOCS: case DBD_REMOVE_CLUSTERS: + case DBD_REMOVE_QOS: case DBD_REMOVE_USERS: slurmdbd_pack_cond_msg( req->msg_type, (dbd_cond_msg_t *)req->data, buffer); @@ -435,7 +441,9 @@ extern Buf pack_slurmdbd_msg(slurmdbd_msg_t *req) req->data, buffer); break; default: - error("slurmdbd: Invalid message type %u", req->msg_type); + error("slurmdbd: Invalid message type pack %u(%s)", + req->msg_type, + slurmdbd_msg_type_2_str(req->msg_type)); free_buf(buffer); return NULL; } @@ -458,6 +466,9 @@ extern int unpack_slurmdbd_msg(slurmdbd_msg_t *resp, Buf buffer) case DBD_GOT_CLUSTERS: case DBD_GOT_JOBS: case DBD_GOT_LIST: + case DBD_ADD_QOS: + case DBD_GOT_QOS: + case DBD_GOT_TXN: case DBD_GOT_USERS: case DBD_UPDATE_SHARES_USED: rc = slurmdbd_unpack_list_msg( @@ -478,9 +489,12 @@ extern int unpack_slurmdbd_msg(slurmdbd_msg_t *resp, Buf buffer) case DBD_GET_CLUSTERS: case DBD_GET_JOBS_COND: case DBD_GET_USERS: + case DBD_GET_QOS: + case DBD_GET_TXN: case DBD_REMOVE_ACCOUNTS: case DBD_REMOVE_ASSOCS: case DBD_REMOVE_CLUSTERS: + case DBD_REMOVE_QOS: case DBD_REMOVE_USERS: rc = slurmdbd_unpack_cond_msg( resp->msg_type, (dbd_cond_msg_t **)&resp->data, buffer); @@ -555,7 +569,9 @@ extern int unpack_slurmdbd_msg(slurmdbd_msg_t *resp, Buf buffer) (dbd_roll_usage_msg_t **)&resp->data, buffer); break; default: - error("slurmdbd: Invalid message type %u", resp->msg_type); + error("slurmdbd: Invalid message type unpack %u(%s)", + resp->msg_type, + slurmdbd_msg_type_2_str(resp->msg_type)); return SLURM_ERROR; } return rc; @@ -564,6 +580,283 @@ unpack_error: return SLURM_ERROR; } +extern slurmdbd_msg_type_t str_2_slurmdbd_msg_type(char *msg_type) +{ + if(!msg_type) { + return NO_VAL; + } else if(!strcasecmp(msg_type, "Init")) { + return DBD_INIT; + } else if(!strcasecmp(msg_type, "Fini")) { + return DBD_FINI; + } else if(!strcasecmp(msg_type, "Add Accounts")) { + return DBD_ADD_ACCOUNTS; + } else if(!strcasecmp(msg_type, "Add Account Coord")) { + return DBD_ADD_ACCOUNT_COORDS; + } else if(!strcasecmp(msg_type, "Add Associations")) { + return DBD_ADD_ASSOCS; + } else if(!strcasecmp(msg_type, "Add Clusters")) { + return DBD_ADD_CLUSTERS; + } else if(!strcasecmp(msg_type, "Add Users")) { + return DBD_ADD_USERS; + } else if(!strcasecmp(msg_type, "Cluster Processors")) { + return DBD_CLUSTER_PROCS; + } else if(!strcasecmp(msg_type, "Flush Jobs")) { + return DBD_FLUSH_JOBS; + } else if(!strcasecmp(msg_type, "Get Accounts")) { + return DBD_GET_ACCOUNTS; + } else if(!strcasecmp(msg_type, "Get Associations")) { + return DBD_GET_ASSOCS; + } else if(!strcasecmp(msg_type, "Get Association Usage")) { + return DBD_GET_ASSOC_USAGE; + } else if(!strcasecmp(msg_type, "Get Clusters")) { + return DBD_GET_CLUSTERS; + } else if(!strcasecmp(msg_type, "Get Cluster Usage")) { + return DBD_GET_CLUSTER_USAGE; + } else if(!strcasecmp(msg_type, "Get Jobs")) { + return DBD_GET_JOBS; + } else if(!strcasecmp(msg_type, "Get Users")) { + return DBD_GET_USERS; + } else if(!strcasecmp(msg_type, "Got Accounts")) { + return DBD_GOT_ACCOUNTS; + } else if(!strcasecmp(msg_type, "Got Associations")) { + return DBD_GOT_ASSOCS; + } else if(!strcasecmp(msg_type, "Got Association Usage")) { + return DBD_GOT_ASSOC_USAGE; + } else if(!strcasecmp(msg_type, "Got Clusters")) { + return DBD_GOT_CLUSTERS; + } else if(!strcasecmp(msg_type, "Got Cluster Usage")) { + return DBD_GOT_CLUSTER_USAGE; + } else if(!strcasecmp(msg_type, "Got Jobs")) { + return DBD_GOT_JOBS; + } else if(!strcasecmp(msg_type, "Got List")) { + return DBD_GOT_LIST; + } else if(!strcasecmp(msg_type, "Got Users")) { + return DBD_GOT_USERS; + } else if(!strcasecmp(msg_type, "Job Complete")) { + return DBD_JOB_COMPLETE; + } else if(!strcasecmp(msg_type, "Job Start")) { + return DBD_JOB_START; + } else if(!strcasecmp(msg_type, "Job Start RC")) { + return DBD_JOB_START_RC; + } else if(!strcasecmp(msg_type, "Job Suspend")) { + return DBD_JOB_SUSPEND; + } else if(!strcasecmp(msg_type, "Modify Accounts")) { + return DBD_MODIFY_ACCOUNTS; + } else if(!strcasecmp(msg_type, "Modify Associations")) { + return DBD_MODIFY_ASSOCS; + } else if(!strcasecmp(msg_type, "Modify Clusters")) { + return DBD_MODIFY_CLUSTERS; + } else if(!strcasecmp(msg_type, "Modify Users")) { + return DBD_MODIFY_USERS; + } else if(!strcasecmp(msg_type, "Node State")) { + return DBD_NODE_STATE; + } else if(!strcasecmp(msg_type, "RC")) { + return DBD_RC; + } else if(!strcasecmp(msg_type, "Register Cluster")) { + return DBD_REGISTER_CTLD; + } else if(!strcasecmp(msg_type, "Remove Accounts")) { + return DBD_REMOVE_ACCOUNTS; + } else if(!strcasecmp(msg_type, "Remove Account Coords")) { + return DBD_REMOVE_ACCOUNT_COORDS; + } else if(!strcasecmp(msg_type, "Remove Associations")) { + return DBD_REMOVE_ASSOCS; + } else if(!strcasecmp(msg_type, "Remove Clusters")) { + return DBD_REMOVE_CLUSTERS; + } else if(!strcasecmp(msg_type, "Remove Users")) { + return DBD_REMOVE_USERS; + } else if(!strcasecmp(msg_type, "Roll Usage")) { + return DBD_ROLL_USAGE; + } else if(!strcasecmp(msg_type, "Step Complete")) { + return DBD_STEP_COMPLETE; + } else if(!strcasecmp(msg_type, "Step Start")) { + return DBD_STEP_START; + } else if(!strcasecmp(msg_type, "Update Shares Used")) { + return DBD_UPDATE_SHARES_USED; + } else if(!strcasecmp(msg_type, "Get Jobs Conditional")) { + return DBD_GET_JOBS_COND; + } else if(!strcasecmp(msg_type, "Get Transations")) { + return DBD_GET_TXN; + } else if(!strcasecmp(msg_type, "Got Transations")) { + return DBD_GOT_TXN; + } else if(!strcasecmp(msg_type, "Add QOS")) { + return DBD_ADD_QOS; + } else if(!strcasecmp(msg_type, "Get QOS")) { + return DBD_GET_QOS; + } else if(!strcasecmp(msg_type, "Got QOS")) { + return DBD_GOT_QOS; + } else if(!strcasecmp(msg_type, "Remove QOS")) { + return DBD_REMOVE_QOS; + } else { + return NO_VAL; + } + + return NO_VAL; +} + +extern char *slurmdbd_msg_type_2_str(slurmdbd_msg_type_t msg_type) +{ + switch(msg_type) { + case DBD_INIT: + return "Init"; + break; + case DBD_FINI: + return "Fini"; + break; + case DBD_ADD_ACCOUNTS: + return "Add Accounts"; + break; + case DBD_ADD_ACCOUNT_COORDS: + return "Add Account Coord"; + break; + case DBD_ADD_ASSOCS: + return "Add Associations"; + break; + case DBD_ADD_CLUSTERS: + return "Add Clusters"; + break; + case DBD_ADD_USERS: + return "Add Users"; + break; + case DBD_CLUSTER_PROCS: + return "Cluster Processors"; + break; + case DBD_FLUSH_JOBS: + return "Flush Jobs"; + break; + case DBD_GET_ACCOUNTS: + return "Get Accounts"; + break; + case DBD_GET_ASSOCS: + return "Get Associations"; + break; + case DBD_GET_ASSOC_USAGE: + return "Get Association Usage"; + break; + case DBD_GET_CLUSTERS: + return "Get Clusters"; + break; + case DBD_GET_CLUSTER_USAGE: + return "Get Cluster Usage"; + break; + case DBD_GET_JOBS: + return "Get Jobs"; + break; + case DBD_GET_USERS: + return "Get Users"; + break; + case DBD_GOT_ACCOUNTS: + return "Got Accounts"; + break; + case DBD_GOT_ASSOCS: + return "Got Associations"; + break; + case DBD_GOT_ASSOC_USAGE: + return "Got Association Usage"; + break; + case DBD_GOT_CLUSTERS: + return "Got Clusters"; + break; + case DBD_GOT_CLUSTER_USAGE: + return "Got Cluster Usage"; + break; + case DBD_GOT_JOBS: + return "Got Jobs"; + break; + case DBD_GOT_LIST: + return "Got List"; + break; + case DBD_GOT_USERS: + return "Got Users"; + break; + case DBD_JOB_COMPLETE: + return "Job Complete"; + break; + case DBD_JOB_START: + return "Job Start"; + break; + case DBD_JOB_START_RC: + return "Job Start RC"; + break; + case DBD_JOB_SUSPEND: + return "Job Suspend"; + break; + case DBD_MODIFY_ACCOUNTS: + return "Modify Accounts"; + break; + case DBD_MODIFY_ASSOCS: + return "Modify Associations"; + break; + case DBD_MODIFY_CLUSTERS: + return "Modify Clusters"; + break; + case DBD_MODIFY_USERS: + return "Modify Users"; + break; + case DBD_NODE_STATE: + return "Node State"; + break; + case DBD_RC: + return "RC"; + break; + case DBD_REGISTER_CTLD: + return "Register Cluster"; + break; + case DBD_REMOVE_ACCOUNTS: + return "Remove Accounts"; + break; + case DBD_REMOVE_ACCOUNT_COORDS: + return "Remove Account Coords"; + break; + case DBD_REMOVE_ASSOCS: + return "Remove Associations"; + break; + case DBD_REMOVE_CLUSTERS: + return "Remove Clusters"; + break; + case DBD_REMOVE_USERS: + return "Remove Users"; + break; + case DBD_ROLL_USAGE: + return "Roll Usage"; + break; + case DBD_STEP_COMPLETE: + return "Step Complete"; + break; + case DBD_STEP_START: + return "Step Start"; + break; + case DBD_UPDATE_SHARES_USED: + return "Update Shares Used"; + break; + case DBD_GET_JOBS_COND: + return "Get Jobs Conditional"; + break; + case DBD_GET_TXN: + return "Get Transations"; + break; + case DBD_GOT_TXN: + return "Got Transations"; + break; + case DBD_ADD_QOS: + return "Add QOS"; + break; + case DBD_GET_QOS: + return "Get QOS"; + break; + case DBD_GOT_QOS: + return "Got QOS"; + break; + case DBD_REMOVE_QOS: + return "Remove QOS"; + break; + default: + return "Unknown"; + break; + } + + return "Unknown"; +} + static int _send_init_msg(void) { int rc; @@ -724,8 +1017,10 @@ static Buf _recv_msg(void) if (msg_read != sizeof(nw_size)) return NULL; msg_size = ntohl(nw_size); - if ((msg_size < 2) || (msg_size > 1000000)) { - error("slurmdbd: Invalid msg_size (%u)"); + /* We don't error check for an upper limit here + * since size could possibly be massive */ + if (msg_size < 2) { + error("slurmdbd: Invalid msg_size (%u)", msg_size); return NULL; } @@ -1248,6 +1543,13 @@ void inline slurmdbd_free_cond_msg(slurmdbd_msg_type_t type, case DBD_GET_JOBS_COND: my_destroy = destroy_acct_job_cond; break; + case DBD_GET_QOS: + case DBD_REMOVE_QOS: + my_destroy = destroy_acct_qos_cond; + break; + case DBD_GET_TXN: + my_destroy = destroy_acct_txn_cond; + break; case DBD_GET_USERS: case DBD_REMOVE_USERS: my_destroy = destroy_acct_user_cond; @@ -1537,10 +1839,17 @@ void inline slurmdbd_pack_cond_msg(slurmdbd_msg_type_t type, case DBD_GET_JOBS_COND: my_function = pack_acct_job_cond; break; + case DBD_GET_QOS: + case DBD_REMOVE_QOS: + my_function = pack_acct_qos_cond; + break; case DBD_GET_USERS: case DBD_REMOVE_USERS: my_function = pack_acct_user_cond; break; + case DBD_GET_TXN: + my_function = pack_acct_txn_cond; + break; default: fatal("Unknown pack type"); return; @@ -1571,10 +1880,17 @@ int inline slurmdbd_unpack_cond_msg(slurmdbd_msg_type_t type, case DBD_GET_JOBS_COND: my_function = unpack_acct_job_cond; break; + case DBD_GET_QOS: + case DBD_REMOVE_QOS: + my_function = unpack_acct_qos_cond; + break; case DBD_GET_USERS: case DBD_REMOVE_USERS: my_function = unpack_acct_user_cond; break; + case DBD_GET_TXN: + my_function = unpack_acct_txn_cond; + break; default: fatal("Unknown unpack type"); return SLURM_ERROR; @@ -1926,10 +2242,17 @@ void inline slurmdbd_pack_list_msg(slurmdbd_msg_type_t type, case DBD_GOT_LIST: my_function = _slurmdbd_packstr; break; + case DBD_ADD_QOS: + case DBD_GOT_QOS: + my_function = pack_acct_qos_rec; + break; case DBD_ADD_USERS: case DBD_GOT_USERS: my_function = pack_acct_user_rec; break; + case DBD_GOT_TXN: + my_function = pack_acct_txn_rec; + break; case DBD_UPDATE_SHARES_USED: my_function = pack_update_shares_used; break; @@ -1987,11 +2310,20 @@ int inline slurmdbd_unpack_list_msg(slurmdbd_msg_type_t type, my_function = _slurmdbd_unpackstr; my_destroy = slurm_destroy_char; break; + case DBD_ADD_QOS: + case DBD_GOT_QOS: + my_function = unpack_acct_qos_rec; + my_destroy = destroy_acct_qos_rec; + break; case DBD_ADD_USERS: case DBD_GOT_USERS: my_function = unpack_acct_user_rec; my_destroy = destroy_acct_user_rec; break; + case DBD_GOT_TXN: + my_function = unpack_acct_txn_rec; + my_destroy = destroy_acct_txn_rec; + break; case DBD_UPDATE_SHARES_USED: my_function = unpack_update_shares_used; my_destroy = destroy_update_shares_rec; diff --git a/src/common/slurmdbd_defs.h b/src/common/slurmdbd_defs.h index c75cd084295bb7c5a8170cd9a73439f730032628..8fa02c7841b285491c6866e5973932325d05b247 100644 --- a/src/common/slurmdbd_defs.h +++ b/src/common/slurmdbd_defs.h @@ -57,10 +57,38 @@ #include "src/common/list.h" #include "src/common/slurm_accounting_storage.h" -/* Increment SLURMDBD_VERSION if any of the RPCs change */ -#define SLURMDBD_VERSION 01 +/* + * SLURMDBD_VERSION is the version of the slurmdbd protocol currently + * being used (i.e. this code). Increment this value whenever an + * RPC is added. Do not modify an existing RPC, but create a new + * msg_type for the new format (add new entries to the end of + * slurmdbd_msg_type_t so numbering of existing msg_type values + * do not change). Comment the version number when a defunct + * msg_type stops being used. For example, rather than changing + * the format of the RPC for DBD_ADD_USERS, add a DBD_ADD_USERS_V2, + * stop using DBD_ADD_USERS and add comment of this sort "Last used + * in SLURMDBD_VERSION 05". The slurmdbd must continue to support + * old RPCs for some time (until all Slurm clusters in that grid + * get upgraded to use the new set of RPCs). At that time, slurmdbd + * can have support for the old RPCs removed. + * + * SLURMDBD_VERSION_MIN is the minimum protocol version which slurmdbd + * will accept. Messages being sent to the slurmdbd from commands + * or daemons using older versions of the protocol will be + * rejected. Increment this value and discard the code processing + * that msg_type only after all systems have been upgraded. Don't + * remove entries from slurmdbd_msg_type_t or the numbering scheme + * will break (the enum value of a msg_type would change). + * + * The slurmdbd should be at least as current as any Slurm cluster + * communicating with it (e.g. it will not accept messages with a + * version higher than SLURMDBD_VERSION). + */ +#define SLURMDBD_VERSION 02 +#define SLURMDBD_VERSION_MIN 02 /* SLURM DBD message types */ +/* ANY TIME YOU ADD TO THIS LIST UPDATE THE CONVERSION FUNCTIONS! */ typedef enum { DBD_INIT = 1400, /* Connection initialization */ DBD_FINI, /* Connection finalization */ @@ -108,7 +136,13 @@ typedef enum { DBD_STEP_COMPLETE, /* Record step completion */ DBD_STEP_START, /* Record step starting */ DBD_UPDATE_SHARES_USED, /* Record current share usage */ - DBD_GET_JOBS_COND /* Get job information with a condition */ + DBD_GET_JOBS_COND, /* Get job information with a condition */ + DBD_GET_TXN, /* Get transaction information */ + DBD_GOT_TXN, /* Got transaction information */ + DBD_ADD_QOS, /* Add QOS information */ + DBD_GET_QOS, /* Get QOS information */ + DBD_GOT_QOS, /* Got QOS information */ + DBD_REMOVE_QOS /* Remove QOS information */ } slurmdbd_msg_type_t; /*****************************************************************************\ @@ -319,6 +353,10 @@ extern int slurm_send_slurmdbd_recv_rc_msg(slurmdbd_msg_t *req, int *rc); extern Buf pack_slurmdbd_msg(slurmdbd_msg_t *req); extern int unpack_slurmdbd_msg(slurmdbd_msg_t *resp, Buf buffer); + +extern slurmdbd_msg_type_t str_2_slurmdbd_msg_type(char *msg_type); +extern char *slurmdbd_msg_type_2_str(slurmdbd_msg_type_t msg_type); + /*****************************************************************************\ * Free various SlurmDBD message structures \*****************************************************************************/ diff --git a/src/common/stepd_api.c b/src/common/stepd_api.c index 038748c6c1750ac25db0c838cc043fa9c71819c9..4e8e8ee97809933d9c9973b7c37e779e3a9b6bb5 100644 --- a/src/common/stepd_api.c +++ b/src/common/stepd_api.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/common/stepd_api.c - slurmstepd message API - * $Id: stepd_api.c 14314 2008-06-23 20:57:56Z jette $ + * $Id: stepd_api.c 14503 2008-07-14 17:27:40Z jette $ ***************************************************************************** * Copyright (C) 2005-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -841,7 +841,7 @@ stepd_stat_jobacct(int fd, stat_jobacct_msg_t *sent, stat_jobacct_msg_t *resp) resp->num_tasks = tasks; return rc; rwfail: - error("an error occured %d", rc); + error("gathering job accounting: %d", rc); jobacct_gather_g_destroy(resp->jobacct); resp->jobacct = NULL; return rc; diff --git a/src/database/Makefile.am b/src/database/Makefile.am index c1538b0517ff120c6a5029c07700e93164d2d15a..31e2b7c0cc9a0858f6c4dc63e05af3d9131f6549 100644 --- a/src/database/Makefile.am +++ b/src/database/Makefile.am @@ -36,4 +36,3 @@ libslurm_pgsql_la_LDFLAGS = $(LIB_LDFLAGS) libslurm_mysql_la_CFLAGS = $(MYSQL_CFLAGS) libslurm_pgsql_la_CFLAGS = $(PGSQL_CFLAGS) - diff --git a/src/database/mysql_common.h b/src/database/mysql_common.h index 69acb54c49974fab068805149b852bfa9a70e226..c8fcb76a1d3be30edef88da36360a1b0d31cbcb8 100644 --- a/src/database/mysql_common.h +++ b/src/database/mysql_common.h @@ -56,10 +56,19 @@ #include "src/common/list.h" #include "src/common/xstring.h" -#ifdef HAVE_MYSQL +#ifndef HAVE_MYSQL +typedef void mysql_conn_t; +#else #include <mysql.h> #include <mysqld_error.h> +typedef struct { + MYSQL *db_conn; + bool rollback; + List update_list; + int conn; +} mysql_conn_t; + typedef struct { uint32_t port; char *host; diff --git a/src/database/pgsql_common.h b/src/database/pgsql_common.h index cc48e8c5fad7acbfa3ee19063278007d118ecae9..2762d57b68cb8dfae4acbfe8991dbc2bf36728b9 100644 --- a/src/database/pgsql_common.h +++ b/src/database/pgsql_common.h @@ -56,9 +56,18 @@ #include "src/slurmctld/slurmctld.h" #include "src/common/xstring.h" -#ifdef HAVE_PGSQL +#ifndef HAVE_PGSQL +typedef void pgsql_conn_t; +#else #include <libpq-fe.h> +typedef struct { + PGconn *db_conn; + bool rollback; + List update_list; + int conn; +} pgsql_conn_t; + typedef struct { uint32_t port; char *host; diff --git a/src/plugins/accounting_storage/filetxt/Makefile.am b/src/plugins/accounting_storage/filetxt/Makefile.am index 4ea567fb172c47e94491508348f2f9937b2c8a6f..b05f7f62c477f915b70336b842c78c891e920155 100644 --- a/src/plugins/accounting_storage/filetxt/Makefile.am +++ b/src/plugins/accounting_storage/filetxt/Makefile.am @@ -11,4 +11,3 @@ pkglib_LTLIBRARIES = accounting_storage_filetxt.la accounting_storage_filetxt_la_SOURCES = accounting_storage_filetxt.c \ filetxt_jobacct_process.c filetxt_jobacct_process.h accounting_storage_filetxt_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) - diff --git a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c index 58b4cf5f00a20a38e7b65b8fb35c6fbbb510b2b5..22c52c03330edf32655b3884420c73a264d47931 100644 --- a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c +++ b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c @@ -180,6 +180,12 @@ extern int init ( void ) mode_t prot = 0600; struct stat statbuf; + if(slurmdbd_conf) { + fatal("The filetxt plugin should not " + "be run from the slurmdbd. " + "Please use a database plugin"); + } + if(first) { debug2("jobacct_init() called"); log_file = slurm_get_accounting_storage_loc(); @@ -274,6 +280,12 @@ extern int acct_storage_p_add_associations(void *db_conn, uint32_t uid, return SLURM_SUCCESS; } +extern int acct_storage_p_add_qos(void *db_conn, uint32_t uid, + List qos_list) +{ + return SLURM_SUCCESS; +} + extern List acct_storage_p_modify_users(void *db_conn, uint32_t uid, acct_user_cond_t *user_q, acct_user_rec_t *user) @@ -333,6 +345,12 @@ extern List acct_storage_p_remove_associations(void *db_conn, uint32_t uid, return SLURM_SUCCESS; } +extern List acct_storage_p_remove_qos(void *db_conn, uint32_t uid, + acct_qos_cond_t *qos_cond) +{ + return NULL; +} + extern List acct_storage_p_get_users(void *db_conn, acct_user_cond_t *user_q) { @@ -357,6 +375,18 @@ extern List acct_storage_p_get_associations(void *db_conn, return NULL; } +extern List acct_storage_p_get_qos(void *db_conn, + acct_qos_cond_t *qos_cond) +{ + return NULL; +} + +extern List acct_storage_p_get_txn(void *db_conn, + acct_txn_cond_t *txn_cond) +{ + return NULL; +} + extern int acct_storage_p_get_usage(void *db_conn, acct_association_rec_t *acct_assoc, time_t start, time_t end) @@ -800,10 +830,37 @@ extern int jobacct_storage_p_suspend(void *db_conn, extern List jobacct_storage_p_get_jobs(void *db_conn, List selected_steps, List selected_parts, - void *params) -{ - return filetxt_jobacct_process_get_jobs(selected_steps, selected_parts, - params); + sacct_parameters_t *params) +{ + List job_list = NULL; + acct_job_cond_t job_cond; + memset(&job_cond, 0, sizeof(acct_job_cond_t)); + + job_cond.acct_list = selected_steps; + job_cond.step_list = selected_steps; + job_cond.partition_list = selected_parts; + job_cond.cluster_list = params->opt_cluster_list; + + if (params->opt_uid >=0) { + char *temp = xstrdup_printf("%u", params->opt_uid); + job_cond.userid_list = list_create(NULL); + list_append(job_cond.userid_list, temp); + } + + if (params->opt_gid >=0) { + char *temp = xstrdup_printf("%u", params->opt_gid); + job_cond.groupid_list = list_create(NULL); + list_append(job_cond.groupid_list, temp); + } + + job_list = filetxt_jobacct_process_get_jobs(&job_cond); + + if(job_cond.userid_list) + list_destroy(job_cond.userid_list); + if(job_cond.groupid_list) + list_destroy(job_cond.groupid_list); + + return job_list; } /* @@ -814,27 +871,7 @@ extern List jobacct_storage_p_get_jobs(void *db_conn, extern List jobacct_storage_p_get_jobs_cond(void *db_conn, acct_job_cond_t *job_cond) { - sacct_parameters_t params; - - memset(¶ms, 0, sizeof(sacct_parameters_t)); - params.opt_uid = -1; - - if(job_cond->cluster_list && list_count(job_cond->cluster_list)) { - params.opt_cluster = list_pop(job_cond->cluster_list); - } - if(job_cond->user_list && list_count(job_cond->user_list)) { - char *user = list_pop(job_cond->user_list); - struct passwd *pw = NULL; - if ((pw=getpwnam(user))) - params.opt_uid = pw->pw_uid; - xfree(user); - } - - return filetxt_jobacct_process_get_jobs(job_cond->step_list, - job_cond->partition_list, - ¶ms); - if(params.opt_cluster) - xfree(params.opt_cluster); + return filetxt_jobacct_process_get_jobs(job_cond); } /* diff --git a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c index 3711582c943823514f95976135b3aad49f9e74ea..db3ec5d2e885c73bd820bdcb6e9f276387de9928 100644 --- a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c +++ b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c @@ -45,8 +45,7 @@ #include "src/common/xstring.h" #include "src/common/xmalloc.h" -#include "src/common/slurm_protocol_api.h" -#include "src/common/jobacct_common.h" +#include "filetxt_jobacct_process.h" #include "src/slurmctld/slurmctld.h" #include "src/slurmdbd/read_config.h" /* Map field names to positions */ @@ -275,12 +274,33 @@ static jobacct_step_rec_t *_create_jobacct_step_rec( } static jobacct_job_rec_t *_create_jobacct_job_rec( - filetxt_job_rec_t *filetxt_job) + filetxt_job_rec_t *filetxt_job, acct_job_cond_t *job_cond) { - jobacct_job_rec_t *jobacct_job = create_jobacct_job_rec(); + jobacct_job_rec_t *jobacct_job = NULL; ListIterator itr = NULL; filetxt_step_rec_t *filetxt_step = NULL; + if(!job_cond) + goto no_cond; + + if (job_cond->state_list + && list_count(job_cond->state_list)) { + char *object = NULL; + itr = list_iterator_create(job_cond->state_list); + while((object = list_next(itr))) { + if (atoi(object) == filetxt_job->status) { + list_iterator_destroy(itr); + goto foundstate; + } + } + list_iterator_destroy(itr); + return NULL; /* no match */ + } + +foundstate: + +no_cond: + jobacct_job = create_jobacct_job_rec(); jobacct_job->associd = 0; jobacct_job->account = xstrdup(filetxt_job->account); jobacct_job->blockid = xstrdup(filetxt_job->header.blockid); @@ -835,8 +855,7 @@ static void _process_start(List job_list, char *f[], int lc, } static void _process_step(List job_list, char *f[], int lc, - int show_full, int len, - sacct_parameters_t *params) + int show_full, int len) { filetxt_job_rec_t *job = NULL; @@ -844,7 +863,7 @@ static void _process_step(List job_list, char *f[], int lc, filetxt_step_rec_t *temp = NULL; _parse_line(f, (void **)&temp, len); - + job = _find_job_record(job_list, temp->header, JOB_STEP); if (temp->stepnum == -2) { @@ -854,11 +873,9 @@ static void _process_step(List job_list, char *f[], int lc, if (!job) { /* fake it for now */ job = _create_filetxt_job_rec(temp->header); job->jobname = xstrdup("(unknown)"); - if (params->opt_verbose > 1) - fprintf(stderr, - "Note: JOB_STEP record %u.%u preceded " - "JOB_START record at line %d\n", - temp->header.jobnum, temp->stepnum, lc); + debug2("Note: JOB_STEP record %u.%u preceded " + "JOB_START record at line %d\n", + temp->header.jobnum, temp->stepnum, lc); } job->show_full = show_full; @@ -945,8 +962,7 @@ static void _process_suspend(List job_list, char *f[], int lc, } static void _process_terminated(List job_list, char *f[], int lc, - int show_full, int len, - sacct_parameters_t *params) + int show_full, int len) { filetxt_job_rec_t *job = NULL; filetxt_job_rec_t *temp = NULL; @@ -956,20 +972,17 @@ static void _process_terminated(List job_list, char *f[], int lc, if (!job) { /* fake it for now */ job = _create_filetxt_job_rec(temp->header); job->jobname = xstrdup("(unknown)"); - if (params->opt_verbose > 1) - fprintf(stderr, "Note: JOB_TERMINATED record for job " - "%u preceded " - "other job records at line %d\n", - temp->header.jobnum, lc); + debug("Note: JOB_TERMINATED record for job " + "%u preceded " + "other job records at line %d\n", + temp->header.jobnum, lc); } else if (job->job_terminated_seen) { if (temp->status == JOB_NODE_FAIL) { /* multiple node failures - extra TERMINATED records */ - if (params->opt_verbose > 1) - fprintf(stderr, - "Note: Duplicate JOB_TERMINATED " - "record (nf) for job %u at " - "line %d\n", - temp->header.jobnum, lc); + debug("Note: Duplicate JOB_TERMINATED " + "record (nf) for job %u at " + "line %d\n", + temp->header.jobnum, lc); /* JOB_TERMINATED/NF records may be preceded * by a JOB_TERMINATED/CA record; NF is much * more interesting. @@ -999,32 +1012,40 @@ finished: _destroy_filetxt_job_rec(temp); } -extern List filetxt_jobacct_process_get_jobs(List selected_steps, - List selected_parts, - sacct_parameters_t *params) +extern List filetxt_jobacct_process_get_jobs(acct_job_cond_t *job_cond) { char line[BUFFER_SIZE]; char *f[MAX_RECORD_FIELDS+1]; /* End list with null entry and, possibly, more data than we expected */ - char *fptr; + char *fptr = NULL, *filein = NULL; int i; FILE *fd = NULL; int lc = 0; int rec_type = -1; + int job_id = 0, step_id = 0, uid = 0, gid = 0; filetxt_job_rec_t *filetxt_job = NULL; jobacct_selected_step_t *selected_step = NULL; - char *selected_part = NULL; - ListIterator itr = NULL; + char *object = NULL; + ListIterator itr = NULL, itr2 = NULL; int show_full = 0; + int fdump_flag = 0; List ret_job_list = list_create(destroy_jobacct_job_rec); List job_list = list_create(_destroy_filetxt_job_rec); - if(slurmdbd_conf) { - params->opt_filein = slurm_get_accounting_storage_loc(); + filein = slurm_get_accounting_storage_loc(); + + /* we grab the fdump only for the filetxt plug through the + FDUMP_FLAG on the job_cond->duplicates variable. We didn't + add this extra field to the structure since it only applies + to this plugin. + */ + if(job_cond) { + fdump_flag = job_cond->duplicates & FDUMP_FLAG; + job_cond->duplicates &= (~FDUMP_FLAG); } - fd = _open_log_file(params->opt_filein); + fd = _open_log_file(filein); while (fgets(line, BUFFER_SIZE, fd)) { lc++; @@ -1048,20 +1069,62 @@ extern List filetxt_jobacct_process_get_jobs(List selected_steps, } rec_type = atoi(f[F_RECTYPE]); - - if (list_count(selected_steps)) { - itr = list_iterator_create(selected_steps); + job_id = atoi(f[F_JOB]); + uid = atoi(f[F_UID]); + gid = atoi(f[F_GID]); + + if(rec_type == JOB_STEP) + step_id = atoi(f[F_JOBSTEP]); + else + step_id = NO_VAL; + + if(!job_cond) { + show_full = 1; + goto no_cond; + } + + if (job_cond->userid_list + && list_count(job_cond->userid_list)) { + itr = list_iterator_create(job_cond->userid_list); + while((object = list_next(itr))) { + if (atoi(object) == uid) { + list_iterator_destroy(itr); + goto founduid; + } + } + list_iterator_destroy(itr); + continue; /* no match */ + } + founduid: + + if (job_cond->groupid_list + && list_count(job_cond->groupid_list)) { + itr = list_iterator_create(job_cond->groupid_list); + while((object = list_next(itr))) { + if (atoi(object) == gid) { + list_iterator_destroy(itr); + goto foundgid; + } + } + list_iterator_destroy(itr); + continue; /* no match */ + } + foundgid: + + if (job_cond->step_list + && list_count(job_cond->step_list)) { + itr = list_iterator_create(job_cond->step_list); while((selected_step = list_next(itr))) { - if (strcmp(selected_step->job, f[F_JOB])) + if (selected_step->jobid != job_id) continue; /* job matches; does the step? */ - if(selected_step->step == NULL) { + if(selected_step->stepid == NO_VAL) { show_full = 1; list_iterator_destroy(itr); goto foundjob; } else if (rec_type != JOB_STEP - || !strcmp(f[F_JOBSTEP], - selected_step->step)) { + || selected_step->stepid + == step_id) { list_iterator_destroy(itr); goto foundjob; } @@ -1073,11 +1136,11 @@ extern List filetxt_jobacct_process_get_jobs(List selected_steps, } foundjob: - if (list_count(selected_parts)) { - itr = list_iterator_create(selected_parts); - while((selected_part = list_next(itr))) - if (!strcasecmp(f[F_PARTITION], - selected_part)) { + if (job_cond->partition_list + && list_count(job_cond->partition_list)) { + itr = list_iterator_create(job_cond->partition_list); + while((object = list_next(itr))) + if (!strcasecmp(f[F_PARTITION], object)) { list_iterator_destroy(itr); goto foundp; } @@ -1085,12 +1148,13 @@ extern List filetxt_jobacct_process_get_jobs(List selected_steps, continue; /* no match */ } foundp: - - if (params->opt_fdump) { + if (fdump_flag) { _do_fdump(f, lc); continue; } - + + no_cond: + /* Build suitable tables with all the data */ switch(rec_type) { case JOB_START: @@ -1105,8 +1169,7 @@ extern List filetxt_jobacct_process_get_jobs(List selected_steps, printf("Bad data on a Step entry\n"); _show_rec(f); } else - _process_step(job_list, f, lc, show_full, i, - params); + _process_step(job_list, f, lc, show_full, i); break; case JOB_SUSPEND: if(i < F_JOB_REQUID) { @@ -1122,36 +1185,53 @@ extern List filetxt_jobacct_process_get_jobs(List selected_steps, _show_rec(f); } else _process_terminated(job_list, f, lc, - show_full, i, params); + show_full, i); break; default: - if (params->opt_verbose > 1) - fprintf(stderr, - "Invalid record at line %d of " - "input file\n", - lc); - if (params->opt_verbose > 2) - _show_rec(f); + debug("Invalid record at line %d of input file", lc); + _show_rec(f); break; } } if (ferror(fd)) { - perror(params->opt_filein); + perror(filein); exit(1); } fclose(fd); itr = list_iterator_create(job_list); + if(!job_cond->duplicates) + itr2 = list_iterator_create(ret_job_list); while((filetxt_job = list_next(itr))) { - list_append(ret_job_list, _create_jobacct_job_rec(filetxt_job)); + jobacct_job_rec_t *jobacct_job = + _create_jobacct_job_rec(filetxt_job, job_cond); + if(jobacct_job) { + jobacct_job_rec_t *curr_job = NULL; + if(job_cond && !job_cond->duplicates) { + while((curr_job = list_next(itr2))) { + if (curr_job->jobid == + jobacct_job->jobid) { + list_delete_item(itr2); + break; + } + } + } + list_append(ret_job_list, jobacct_job); + + if(!job_cond->duplicates) + list_iterator_reset(itr2); + } } + + if(!job_cond->duplicates) + list_iterator_destroy(itr2); + list_iterator_destroy(itr); list_destroy(job_list); - if(slurmdbd_conf) { - xfree(params->opt_filein); - } + xfree(filein); + return ret_job_list; } diff --git a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.h b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.h index a5ba22deff7cbd5b5ad616764082da454a84dc19..acd4a43b1c69f96d383001684ff159b432c42d62 100644 --- a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.h +++ b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.h @@ -44,11 +44,10 @@ #define _HAVE_FILETXT_JOBACCT_PROCESS_H #include "src/common/jobacct_common.h" +#include "src/common/slurm_accounting_storage.h" #include "src/slurmdbd/read_config.h" -extern List filetxt_jobacct_process_get_jobs(List selected_steps, - List selected_parts, - sacct_parameters_t *params); +extern List filetxt_jobacct_process_get_jobs(acct_job_cond_t *job_cond); extern void filetxt_jobacct_process_archive(List selected_parts, sacct_parameters_t *params); diff --git a/src/plugins/accounting_storage/gold/accounting_storage_gold.c b/src/plugins/accounting_storage/gold/accounting_storage_gold.c index 3c5fca395af952c35a180f665c3756ba42081d5a..bd9629fd9e6a479e5e3d6a161742427bf077d005 100644 --- a/src/plugins/accounting_storage/gold/accounting_storage_gold.c +++ b/src/plugins/accounting_storage/gold/accounting_storage_gold.c @@ -455,10 +455,18 @@ static List _get_user_list_from_response(gold_response_t *gold_response) if(!strcmp(name_val->name, "Name")) { user_rec->name = xstrdup(name_val->value); - } else if(!strcmp(name_val->name, "Expedite")) { - user_rec->qos = - atoi(name_val->value)+1; - } else if(!strcmp(name_val->name, "DefaultProject")) { + } /* else if(!strcmp(name_val->name, "Expedite")) { */ +/* if(user_rec->qos_list) */ +/* continue; */ +/* user_rec->qos_list = */ +/* list_create(slurm_destroy_char); */ +/* /\*really needs to have 1 added here */ +/* but we shouldn't ever need to use */ +/* this. */ +/* *\/ */ +/* slurm_addto_char_list(user_rec->qos_list, */ +/* name_val->value); */ +/* } */else if(!strcmp(name_val->name, "DefaultProject")) { user_rec->default_acct = xstrdup(name_val->value); } else { @@ -491,10 +499,10 @@ static List _get_acct_list_from_response(gold_response_t *gold_response) itr2 = list_iterator_create(resp_entry->name_val); while((name_val = list_next(itr2))) { - if(!strcmp(name_val->name, "Expedite")) { - acct_rec->qos = - atoi(name_val->value)+1; - } else if(!strcmp(name_val->name, + /* if(!strcmp(name_val->name, "Expedite")) { */ +/* acct_rec->qos = */ +/* atoi(name_val->value)+1; */ +/* } else */ if(!strcmp(name_val->name, "Name")) { acct_rec->name = xstrdup(name_val->value); @@ -740,7 +748,7 @@ extern int acct_storage_p_add_users(void *db_conn, gold_request_t *gold_request = NULL; gold_response_t *gold_response = NULL; acct_user_rec_t *object = NULL; - char tmp_buff[50]; +// char tmp_buff[50]; itr = list_iterator_create(user_list); while((object = list_next(itr))) { @@ -762,12 +770,12 @@ extern int acct_storage_p_add_users(void *db_conn, gold_request_add_assignment(gold_request, "DefaultProject", object->default_acct); - if(object->qos != ACCT_QOS_NOTSET) { - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - object->qos-1); - gold_request_add_assignment(gold_request, "Expedite", - tmp_buff); - } +/* if(object->qos != ACCT_QOS_NOTSET) { */ +/* snprintf(tmp_buff, sizeof(tmp_buff), "%u", */ +/* object->qos-1); */ +/* gold_request_add_assignment(gold_request, "Expedite", */ +/* tmp_buff); */ +/* } */ gold_response = get_gold_response(gold_request); destroy_gold_request(gold_request); @@ -809,7 +817,7 @@ extern int acct_storage_p_add_accts(void *db_conn, gold_request_t *gold_request = NULL; gold_response_t *gold_response = NULL; acct_account_rec_t *object = NULL; - char tmp_buff[50]; +// char tmp_buff[50]; itr = list_iterator_create(acct_list); while((object = list_next(itr))) { @@ -833,12 +841,12 @@ extern int acct_storage_p_add_accts(void *db_conn, object->description); gold_request_add_assignment(gold_request, "Organization", object->organization); - if(object->qos != ACCT_QOS_NOTSET) { - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - object->qos-1); - gold_request_add_assignment(gold_request, "Expedite", - tmp_buff); - } +/* if(object->qos != ACCT_QOS_NOTSET) { */ +/* snprintf(tmp_buff, sizeof(tmp_buff), "%u", */ +/* object->qos-1); */ +/* gold_request_add_assignment(gold_request, "Expedite", */ +/* tmp_buff); */ +/* } */ gold_response = get_gold_response(gold_request); destroy_gold_request(gold_request); @@ -1109,6 +1117,12 @@ extern int acct_storage_p_validate_assoc_id(void *db_conn, return SLURM_SUCCESS; } +extern int acct_storage_p_add_qos(void *db_conn, uint32_t uid, + List qos_list) +{ + return SLURM_SUCCESS; +} + extern List acct_storage_p_modify_users(void *db_conn, acct_user_cond_t *user_q, acct_user_rec_t *user) @@ -1118,7 +1132,7 @@ extern List acct_storage_p_modify_users(void *db_conn, gold_request_t *gold_request = NULL; gold_response_t *gold_response = NULL; char *object = NULL; - char tmp_buff[50]; +// char tmp_buff[50]; int set = 0; if(!user_q) { @@ -1141,9 +1155,10 @@ extern List acct_storage_p_modify_users(void *db_conn, return NULL; } - if(user_q->user_list && list_count(user_q->user_list)) { - itr = list_iterator_create(user_q->user_list); - if(list_count(user_q->user_list) > 1) + if(user_q->assoc_cond->user_list + && list_count(user_q->assoc_cond->user_list)) { + itr = list_iterator_create(user_q->assoc_cond->user_list); + if(list_count(user_q->assoc_cond->user_list) > 1) set = 2; else set = 0; @@ -1179,12 +1194,12 @@ extern List acct_storage_p_modify_users(void *db_conn, "DefaultProject", user->default_acct); - if(user->qos != ACCT_QOS_NOTSET) { - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - user->qos-1); - gold_request_add_assignment(gold_request, "Expedite", - tmp_buff); - } +/* if(user->qos != ACCT_QOS_NOTSET) { */ +/* snprintf(tmp_buff, sizeof(tmp_buff), "%u", */ +/* user->qos-1); */ +/* gold_request_add_assignment(gold_request, "Expedite", */ +/* tmp_buff); */ +/* } */ gold_response = get_gold_response(gold_request); destroy_gold_request(gold_request); @@ -1259,9 +1274,10 @@ extern List acct_storage_p_modify_user_admin_level(void *db_conn, return NULL; } - if(user_q->user_list && list_count(user_q->user_list)) { - itr = list_iterator_create(user_q->user_list); - if(list_count(user_q->user_list) > 1) + if(user_q->assoc_cond->user_list + && list_count(user_q->assoc_cond->user_list)) { + itr = list_iterator_create(user_q->assoc_cond->user_list); + if(list_count(user_q->assoc_cond->user_list) > 1) set = 2; else set = 0; @@ -1321,7 +1337,7 @@ extern List acct_storage_p_modify_accts(void *db_conn, // int rc = SLURM_SUCCESS; gold_request_t *gold_request = NULL; gold_response_t *gold_response = NULL; - char tmp_buff[50]; +// char tmp_buff[50]; int set = 0; char *object = NULL; @@ -1344,9 +1360,10 @@ extern List acct_storage_p_modify_accts(void *db_conn, return NULL; } - if(acct_q->acct_list && list_count(acct_q->acct_list)) { - itr = list_iterator_create(acct_q->acct_list); - if(list_count(acct_q->acct_list) > 1) + if(acct_q->assoc_cond->acct_list + && list_count(acct_q->assoc_cond->acct_list)) { + itr = list_iterator_create(acct_q->assoc_cond->acct_list); + if(list_count(acct_q->assoc_cond->acct_list) > 1) set = 2; else set = 0; @@ -1403,12 +1420,12 @@ extern List acct_storage_p_modify_accts(void *db_conn, "Organization", acct->organization); - if(acct->qos != ACCT_QOS_NOTSET) { - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - acct->qos-1); - gold_request_add_assignment(gold_request, "Expedite", - tmp_buff); - } +/* if(acct->qos != ACCT_QOS_NOTSET) { */ +/* snprintf(tmp_buff, sizeof(tmp_buff), "%u", */ +/* acct->qos-1); */ +/* gold_request_add_assignment(gold_request, "Expedite", */ +/* tmp_buff); */ +/* } */ gold_response = get_gold_response(gold_request); destroy_gold_request(gold_request); @@ -1616,9 +1633,10 @@ extern List acct_storage_p_remove_users(void *db_conn, return NULL; } - if(user_q->user_list && list_count(user_q->user_list)) { - itr = list_iterator_create(user_q->user_list); - if(list_count(user_q->user_list) > 1) + if(user_q->assoc_cond->user_list + && list_count(user_q->assoc_cond->user_list)) { + itr = list_iterator_create(user_q->assoc_cond->user_list); + if(list_count(user_q->assoc_cond->user_list) > 1) set = 2; else set = 0; @@ -1702,9 +1720,10 @@ extern List acct_storage_p_remove_accts(void *db_conn, return NULL; } - if(acct_q->acct_list && list_count(acct_q->acct_list)) { - itr = list_iterator_create(acct_q->acct_list); - if(list_count(acct_q->acct_list) > 1) + if(acct_q->assoc_cond->acct_list + && list_count(acct_q->assoc_cond->acct_list)) { + itr = list_iterator_create(acct_q->assoc_cond->acct_list); + if(list_count(acct_q->assoc_cond->acct_list) > 1) set = 2; else set = 0; @@ -2060,6 +2079,12 @@ extern List acct_storage_p_remove_associations(void *db_conn, return NULL; } +extern List acct_storage_p_remove_qos(void *db_conn, uint32_t uid, + acct_qos_cond_t *qos_cond) +{ + return NULL; +} + extern List acct_storage_p_get_users(void *db_conn, acct_user_cond_t *user_q) { @@ -2069,7 +2094,7 @@ extern List acct_storage_p_get_users(void *db_conn, ListIterator itr = NULL; char *object = NULL; int set = 0; - char tmp_buff[50]; +// char tmp_buff[50]; gold_request = create_gold_request(GOLD_OBJECT_USER, GOLD_ACTION_QUERY); @@ -2080,9 +2105,10 @@ extern List acct_storage_p_get_users(void *db_conn, if(!user_q) goto empty; - if(user_q->user_list && list_count(user_q->user_list)) { - itr = list_iterator_create(user_q->user_list); - if(list_count(user_q->user_list) > 1) + if(user_q->assoc_cond->user_list + && list_count(user_q->assoc_cond->user_list)) { + itr = list_iterator_create(user_q->assoc_cond->user_list); + if(list_count(user_q->assoc_cond->user_list) > 1) set = 2; else set = 0; @@ -2113,13 +2139,13 @@ extern List acct_storage_p_get_users(void *db_conn, list_iterator_destroy(itr); } - if(user_q->qos != ACCT_QOS_NOTSET) { - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - user_q->qos-1); - gold_request_add_condition(gold_request, "Expedite", - tmp_buff, - GOLD_OPERATOR_NONE, 0); - } +/* if(user_q->qos != ACCT_QOS_NOTSET) { */ +/* snprintf(tmp_buff, sizeof(tmp_buff), "%u", */ +/* user_q->qos-1); */ +/* gold_request_add_condition(gold_request, "Expedite", */ +/* tmp_buff, */ +/* GOLD_OPERATOR_NONE, 0); */ +/* } */ empty: gold_request_add_condition(gold_request, "Active", @@ -2160,7 +2186,7 @@ extern List acct_storage_p_get_accts(void *db_conn, ListIterator itr = NULL; int set = 0; char *object = NULL; - char tmp_buff[50]; +// char tmp_buff[50]; gold_request = create_gold_request(GOLD_OBJECT_PROJECT, @@ -2171,9 +2197,10 @@ extern List acct_storage_p_get_accts(void *db_conn, if(!acct_q) goto empty; - if(acct_q->acct_list && list_count(acct_q->acct_list)) { - itr = list_iterator_create(acct_q->acct_list); - if(list_count(acct_q->acct_list) > 1) + if(acct_q->assoc_cond->acct_list + && list_count(acct_q->assoc_cond->acct_list)) { + itr = list_iterator_create(acct_q->assoc_cond->acct_list); + if(list_count(acct_q->assoc_cond->acct_list) > 1) set = 2; else set = 0; @@ -2221,13 +2248,13 @@ extern List acct_storage_p_get_accts(void *db_conn, list_iterator_destroy(itr); } - if(acct_q->qos != ACCT_QOS_NOTSET) { - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - acct_q->qos-1); - gold_request_add_condition(gold_request, "Expedite", - tmp_buff, - GOLD_OPERATOR_NONE, 0); - } +/* if(acct_q->qos != ACCT_QOS_NOTSET) { */ +/* snprintf(tmp_buff, sizeof(tmp_buff), "%u", */ +/* acct_q->qos-1); */ +/* gold_request_add_condition(gold_request, "Expedite", */ +/* tmp_buff, */ +/* GOLD_OPERATOR_NONE, 0); */ +/* } */ empty: gold_request_add_condition(gold_request, "Active", "True", @@ -2434,6 +2461,18 @@ empty: return association_list; } +extern List acct_storage_p_get_qos(void *db_conn, + acct_qos_cond_t *qos_cond) +{ + return NULL; +} + +extern List acct_storage_p_get_txn(void *db_conn, + acct_txn_cond_t *txn_cond) +{ + return NULL; +} + extern int acct_storage_p_get_usage(void *db_conn, acct_association_rec_t *acct_assoc, time_t start, time_t end) @@ -3149,20 +3188,20 @@ extern List jobacct_storage_p_get_jobs(void *db_conn, /* "id %u", */ /* account_rec.id); */ - if(account_rec.cluster) { - if(params->opt_cluster && - strcmp(params->opt_cluster, - account_rec. - cluster)) { - destroy_jobacct_job_rec( - job); - job = NULL; - break; - } - job->cluster = - xstrdup(account_rec. - cluster); - } +/* if(account_rec.cluster) { */ +/* if(params->opt_cluster && */ +/* strcmp(params->opt_cluster, */ +/* account_rec. */ +/* cluster)) { */ +/* destroy_jobacct_job_rec( */ +/* job); */ +/* job = NULL; */ +/* break; */ +/* } */ +/* job->cluster = */ +/* xstrdup(account_rec. */ +/* cluster); */ +/* } */ if(account_rec.user) { struct passwd *passwd_ptr = @@ -3215,9 +3254,9 @@ extern List jobacct_storage_p_get_jobs(void *db_conn, job->state = atoi(name_val->value); } else if(!strcmp(name_val->name, "ExitCode")) { job->exitcode = atoi(name_val->value); - } else if(!strcmp(name_val->name, "QoS")) { - job->qos = atoi(name_val->value); - } + } /* else if(!strcmp(name_val->name, "QoS")) { */ +/* job->qos = atoi(name_val->value); */ +/* } */ } list_iterator_destroy(itr2); diff --git a/src/plugins/accounting_storage/mysql/Makefile.am b/src/plugins/accounting_storage/mysql/Makefile.am index a34ba8aa8716e77c5ab17b49ab2af51b70d95f00..c7414d3eb473beeda7b9e3cb0350d4b1914812b7 100644 --- a/src/plugins/accounting_storage/mysql/Makefile.am +++ b/src/plugins/accounting_storage/mysql/Makefile.am @@ -16,6 +16,7 @@ accounting_storage_mysql_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) accounting_storage_mysql_la_CFLAGS = $(MYSQL_CFLAGS) accounting_storage_mysql_la_LIBADD = \ $(top_builddir)/src/database/libslurm_mysql.la $(MYSQL_LIBS) -accounting_storage_mysql_la_DEPENDENCIES = \ - $(top_builddir)/src/database/libslurm_mysql.la +force: +$(accounting_storage_mysql_la_LIBADD) : force + @cd `dirname $@` && $(MAKE) `basename $@` diff --git a/src/plugins/accounting_storage/mysql/Makefile.in b/src/plugins/accounting_storage/mysql/Makefile.in index 2a69a97b1393b500efd12cb0d950069ebc348c02..b3f1531ee5c8ffcb40fd5d34531f4ad3fb33ef6b 100644 --- a/src/plugins/accounting_storage/mysql/Makefile.in +++ b/src/plugins/accounting_storage/mysql/Makefile.in @@ -76,6 +76,9 @@ am__installdirs = "$(DESTDIR)$(pkglibdir)" pkglibLTLIBRARIES_INSTALL = $(INSTALL) LTLIBRARIES = $(pkglib_LTLIBRARIES) am__DEPENDENCIES_1 = +accounting_storage_mysql_la_DEPENDENCIES = \ + $(top_builddir)/src/database/libslurm_mysql.la \ + $(am__DEPENDENCIES_1) am_accounting_storage_mysql_la_OBJECTS = \ accounting_storage_mysql_la-accounting_storage_mysql.lo \ accounting_storage_mysql_la-mysql_jobacct_process.lo \ @@ -285,9 +288,6 @@ accounting_storage_mysql_la_CFLAGS = $(MYSQL_CFLAGS) accounting_storage_mysql_la_LIBADD = \ $(top_builddir)/src/database/libslurm_mysql.la $(MYSQL_LIBS) -accounting_storage_mysql_la_DEPENDENCIES = \ - $(top_builddir)/src/database/libslurm_mysql.la - all: all-am .SUFFIXES: @@ -588,6 +588,10 @@ uninstall-am: uninstall-pkglibLTLIBRARIES mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES + +force: +$(accounting_storage_mysql_la_LIBADD) : force + @cd `dirname $@` && $(MAKE) `basename $@` # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c index 1230c413cd91397dfa6b3b497268b86f209d8c8a..0e06f7ce471e68edb5807889363c925903cfee5a 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c @@ -106,6 +106,7 @@ char *cluster_month_table = "cluster_month_usage_table"; char *cluster_table = "cluster_table"; char *event_table = "cluster_event_table"; char *job_table = "job_table"; +char *qos_table = "qos_table"; char *step_table = "step_table"; char *txn_table = "txn_table"; char *user_table = "user_table"; @@ -118,8 +119,9 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, uint32_t uid, List association_list); -extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, - acct_association_cond_t *assoc_q); +extern List acct_storage_p_get_associations( + mysql_conn_t *mysql_conn, + acct_association_cond_t *assoc_cond); extern int acct_storage_p_get_usage(mysql_conn_t *mysql_conn, acct_association_rec_t *acct_assoc, @@ -135,11 +137,11 @@ static int _check_connection(mysql_conn_t *mysql_conn) if(!mysql_conn) { error("We need a connection to run this"); return SLURM_ERROR; - } else if(!mysql_conn->acct_mysql_db - || mysql_db_ping(mysql_conn->acct_mysql_db) != 0) { - if(mysql_get_db_connection(&mysql_conn->acct_mysql_db, + } else if(!mysql_conn->db_conn + || mysql_db_ping(mysql_conn->db_conn) != 0) { + if(mysql_get_db_connection(&mysql_conn->db_conn, mysql_db_name, mysql_db_info) - != SLURM_SUCCESS) { + != SLURM_SUCCESS) { error("unable to re-connect to mysql database"); return SLURM_ERROR; } @@ -191,6 +193,11 @@ static int _addto_update_list(List update_list, acct_update_type_t type, update_object->objects = list_create( destroy_acct_association_rec); break; + case ACCT_ADD_QOS: + case ACCT_REMOVE_QOS: + update_object->objects = list_create( + destroy_acct_qos_rec); + break; case ACCT_UPDATE_NOTSET: default: error("unknown type set in update_object: %d", type); @@ -215,9 +222,9 @@ static int _move_account(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, "where cluster='%s' && acct='%s' && user='';", assoc_table, cluster, parent); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -269,8 +276,8 @@ static int _move_account(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, xstrfmtcat(query, "update %s set parent_acct='%s' where id = %s;", assoc_table, parent, id); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); return rc; @@ -299,9 +306,9 @@ static int _move_parent(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, "&& acct='%s' && user='' order by lft;", assoc_table, lft, rgt, new_parent); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = - mysql_db_query_ret(mysql_conn->acct_mysql_db, query, 0))) { + mysql_db_query_ret(mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -311,7 +318,7 @@ static int _move_parent(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, debug4("%s(%s) %s,%s is a child of %s", new_parent, row[0], row[1], row[2], id); rc = _move_account(mysql_conn, atoi(row[1]), atoi(row[2]), - cluster, row[0], old_parent); + cluster, row[0], old_parent); } mysql_free_result(result); @@ -392,13 +399,13 @@ static int _modify_common(mysql_conn_t *mysql_conn, "values (%d, %d, \"%s\", '%s', \"%s\");", txn_table, now, type, cond_char, user_name, vals); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); @@ -468,9 +475,9 @@ static int _modify_unset_users(mysql_conn_t *mysql_conn, /* "order by lft;", */ /* object, assoc_table, lft, rgt); */ xfree(object); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = - mysql_db_query_ret(mysql_conn->acct_mysql_db, query, 0))) { + mysql_db_query_ret(mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -557,7 +564,71 @@ static int _modify_unset_users(mysql_conn_t *mysql_conn, return SLURM_SUCCESS; } +/* this function is here to see if any of what we are trying to remove + * has jobs that are or were once running. So if we have jobs and the + * object is less than a day old we don't want to delete it only set + * the deleted flag. + */ +static bool _check_jobs_before_remove(mysql_conn_t *mysql_conn, + char *assoc_char) +{ + char *query = NULL; + bool rc = 0; + MYSQL_RES *result = NULL; + + query = xstrdup_printf("select t0.associd from %s as t0, %s as t1, " + "%s as t2 where t1.lft between " + "t2.lft and t2.rgt && (%s)" + "and t0.associd=t1.id limit 1;", + job_table, assoc_table, assoc_table, + assoc_char); + + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + if(!(result = mysql_db_query_ret( + mysql_conn->db_conn, query, 0))) { + xfree(query); + return rc; + } + xfree(query); + + if(mysql_num_rows(result)) { + debug4("We have jobs for this combo"); + rc = true; + } + + mysql_free_result(result); + return rc; +} + +static bool _check_jobs_before_remove_assoc(mysql_conn_t *mysql_conn, + char *assoc_char) +{ + char *query = NULL; + bool rc = 0; + MYSQL_RES *result = NULL; + + query = xstrdup_printf("select t1.associd from %s as t1, " + "%s as t2 where (%s)" + "and t1.associd=t2.id limit 1;", + job_table, assoc_table, + assoc_char); + + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + if(!(result = mysql_db_query_ret( + mysql_conn->db_conn, query, 0))) { + xfree(query); + return rc; + } + xfree(query); + + if(mysql_num_rows(result)) { + debug4("We have jobs for this combo"); + rc = true; + } + mysql_free_result(result); + return rc; +} /* Every option in assoc_char should have a 't1.' infront of it. */ static int _remove_common(mysql_conn_t *mysql_conn, @@ -574,37 +645,108 @@ static int _remove_common(mysql_conn_t *mysql_conn, MYSQL_RES *result = NULL; MYSQL_ROW row; time_t day_old = now - DELETE_SEC_BACK; + bool has_jobs = false; + /* If we have jobs associated with this we do not want to + * really delete it for accounting purposes. This is for + * corner cases most of the time this won't matter. + */ + if(table == acct_coord_table + || table == qos_table) { + /* This doesn't apply for these tables since we are + * only looking for association type tables. + */ + } else if(table != assoc_table) { + has_jobs = _check_jobs_before_remove(mysql_conn, assoc_char); + } else { + has_jobs = _check_jobs_before_remove_assoc(mysql_conn, + name_char); + } /* we want to remove completely all that is less than a day old */ - if(table != assoc_table) { + if(!has_jobs && table != assoc_table) { query = xstrdup_printf("delete from %s where creation_time>%d " "&& (%s);", table, day_old, name_char); } - xstrfmtcat(query, - "update %s set mod_time=%d, deleted=1 " - "where deleted=0 && (%s);", - table, now, name_char); + if(table != assoc_table) + xstrfmtcat(query, + "update %s set mod_time=%d, deleted=1 " + "where deleted=0 && (%s);", + table, now, name_char); + xstrfmtcat(query, "insert into %s (timestamp, action, name, actor) " "values (%d, %d, \"%s\", '%s');", txn_table, now, type, name_char, user_name); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); return SLURM_ERROR; } - if(table == acct_coord_table) + if(table == qos_table) { + /* remove this qos from all the users/accts that have it + */ + xstrfmtcat(query, + "update %s set mod_time=%d, %s " + "where deleted=0;", + user_table, now, assoc_char); + xstrfmtcat(query, + "update %s set mod_time=%d, %s " + "where deleted=0;", + acct_table, now, assoc_char); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); + xfree(query); + if(rc != SLURM_SUCCESS) { + if(mysql_conn->rollback) { + mysql_db_rollback(mysql_conn->db_conn); + } + list_flush(mysql_conn->update_list); + + return SLURM_ERROR; + } + /* now get what we changed and set the update */ + xstrfmtcat(query, + "select name, qos from %s where " + "mod_time=%d and deleted=0;", + user_table, now); + if(!(result = mysql_db_query_ret( + mysql_conn->db_conn, query, 0))) { + xfree(query); + if(mysql_conn->rollback) { + mysql_db_rollback(mysql_conn->db_conn); + } + list_flush(mysql_conn->update_list); + + return SLURM_ERROR; + } + + rc = 0; + while((row = mysql_fetch_row(result))) { + acct_user_rec_t *user_rec = + xmalloc(sizeof(acct_user_rec_t)); + user_rec->name = xstrdup(row[0]); + user_rec->qos_list = list_create(slurm_destroy_char); + slurm_addto_char_list(user_rec->qos_list, row[1]); + _addto_update_list(mysql_conn->update_list, + ACCT_MODIFY_USER, + user_rec); + } + mysql_free_result(result); + + return SLURM_SUCCESS; + } else if(table == acct_coord_table) return SLURM_SUCCESS; /* mark deleted=1 or remove completely the @@ -614,7 +756,7 @@ static int _remove_common(mysql_conn_t *mysql_conn, if(!assoc_char) { error("no assoc_char"); if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); return SLURM_ERROR; @@ -627,16 +769,18 @@ static int _remove_common(mysql_conn_t *mysql_conn, /* assoc_table, assoc_char); */ query = xstrdup_printf("select distinct t1.id " "from %s as t1, %s as t2 " - "where %s && t1.lft between " - "t2.lft and t2.rgt;", + "where (%s) && t1.lft between " + "t2.lft and t2.rgt && t1.deleted=0 " + " && t2.deleted=0;", assoc_table, assoc_table, assoc_char); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); return SLURM_ERROR; @@ -665,20 +809,20 @@ static int _remove_common(mysql_conn_t *mysql_conn, } else loc_assoc_char = assoc_char; -/* query = xstrdup_printf( */ -/* "delete t2 from %s as t2, %s as t1 where t1.creation_time>%d && (%s);" */ -/* "delete t2 from %s as t2, %s as t1 where t1.creation_time>%d && (%s);" */ -/* "delete t2 from %s as t2, %s as t1 where t1.creation_time>%d && (%s);", */ -/* assoc_day_table, assoc_table, day_old, loc_assoc_char, */ -/* assoc_hour_table, assoc_table, day_old, loc_assoc_char, */ -/* assoc_month_table, assoc_table, day_old, loc_assoc_char); */ - query = xstrdup_printf( - "delete from %s where creation_time>%d && (%s);" - "delete from %s where creation_time>%d && (%s);" - "delete from %s where creation_time>%d && (%s);", - assoc_day_table, day_old, loc_assoc_char, - assoc_hour_table, day_old, loc_assoc_char, - assoc_month_table, day_old, loc_assoc_char); + if(!loc_assoc_char) { + debug2("No associations with object being deleted\n"); + return rc; + } + + if(!has_jobs) + query = xstrdup_printf( + "delete from %s where creation_time>%d && (%s);" + "delete from %s where creation_time>%d && (%s);" + "delete from %s where creation_time>%d && (%s);", + assoc_day_table, day_old, loc_assoc_char, + assoc_hour_table, day_old, loc_assoc_char, + assoc_month_table, day_old, loc_assoc_char); + xstrfmtcat(query, "update %s set mod_time=%d, deleted=1 where (%s);" "update %s set mod_time=%d, deleted=1 where (%s);" @@ -687,17 +831,25 @@ static int _remove_common(mysql_conn_t *mysql_conn, assoc_hour_table, now, loc_assoc_char, assoc_month_table, now, loc_assoc_char); - debug3("%d query\n%s %d", mysql_conn->conn, query, strlen(query)); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s %d", + mysql_conn->conn, __LINE__, query, strlen(query)); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); return SLURM_ERROR; } + /* If we have jobs that have ran don't go through the logic of + * removing the associations. Since we may want them for + * reports in the future since jobs had ran. + */ + if(has_jobs) + goto just_update; + /* remove completely all the associations for this added in the last * day, since they are most likely nothing we really wanted in * the first place. @@ -706,31 +858,34 @@ static int _remove_common(mysql_conn_t *mysql_conn, "creation_time>%d && (%s);", assoc_table, day_old, loc_assoc_char); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); return SLURM_ERROR; } xfree(query); - /* we have to do this one at a time since the lft's and rgt's - change */ while((row = mysql_fetch_row(result))) { MYSQL_RES *result2 = NULL; MYSQL_ROW row2; + /* we have to do this one at a time since the lft's and rgt's + change. If you think you need to remove this make + sure your new way can handle changing lft and rgt's + in the association. */ xstrfmtcat(query, "SELECT lft, rgt, (rgt - lft + 1) " "FROM %s WHERE id = %s;", assoc_table, row[0]); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); if(!(result2 = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); rc = SLURM_ERROR; break; @@ -746,6 +901,7 @@ static int _remove_common(mysql_conn_t *mysql_conn, "%s AND %s;", assoc_table, row2[0], row2[1]); + xstrfmtcat(query, "UPDATE %s SET rgt = rgt - %s WHERE " "rgt > %s;" @@ -753,12 +909,14 @@ static int _remove_common(mysql_conn_t *mysql_conn, "lft > %s;", assoc_table, row2[2], row2[1], - assoc_table, row2[2], row2[1]); - + assoc_table, row2[2], + row2[1]); + mysql_free_result(result2); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("couldn't remove assoc"); @@ -768,27 +926,36 @@ static int _remove_common(mysql_conn_t *mysql_conn, mysql_free_result(result); if(rc == SLURM_ERROR) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); return rc; } - - if(table == assoc_table) - return SLURM_SUCCESS; - - /* now update the associations themselves that are still around */ - query = xstrdup_printf("update %s as t1 set mod_time=%d, deleted=1 " - "where deleted=0 && (%s);", + +just_update: + /* now update the associations themselves that are still + * around clearing all the limits since if we add them back + * we don't want any residue from past associations lingering + * around. + */ + query = xstrdup_printf("update %s as t1 set mod_time=%d, deleted=1, " + "fairshare=1, max_jobs=NULL, " + "max_nodes_per_job=NULL, " + "max_wall_duration_per_job=NULL, " + "max_cpu_secs_per_job=NULL " + "where (%s);", assoc_table, now, loc_assoc_char); - xfree(loc_assoc_char); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + + if(table != assoc_table) + xfree(loc_assoc_char); + + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); } @@ -817,7 +984,7 @@ static int _get_account_coords(mysql_conn_t *mysql_conn, acct_coord_table, acct->name); if(!(result = - mysql_db_query_ret(mysql_conn->acct_mysql_db, query, 0))) { + mysql_db_query_ret(mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -838,7 +1005,7 @@ static int _get_account_coords(mysql_conn_t *mysql_conn, acct_coord_table, assoc_table, assoc_table, acct->name, acct->name); if(!(result = - mysql_db_query_ret(mysql_conn->acct_mysql_db, query, 0))) { + mysql_db_query_ret(mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -873,7 +1040,7 @@ static int _get_user_coords(mysql_conn_t *mysql_conn, acct_user_rec_t *user) acct_coord_table, user->name); if(!(result = - mysql_db_query_ret(mysql_conn->acct_mysql_db, query, 0))) { + mysql_db_query_ret(mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -904,7 +1071,7 @@ static int _get_user_coords(mysql_conn_t *mysql_conn, acct_user_rec_t *user) if(query) { if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -932,7 +1099,7 @@ static int _get_user_coords(mysql_conn_t *mysql_conn, acct_user_rec_t *user) return SLURM_SUCCESS; } -static int _get_db_index(MYSQL *acct_mysql_db, +static int _get_db_index(MYSQL *db_conn, time_t submit, uint32_t jobid, uint32_t associd) { MYSQL_RES *result = NULL; @@ -942,7 +1109,7 @@ static int _get_db_index(MYSQL *acct_mysql_db, "submit=%d and jobid=%u and associd=%u", job_table, (int)submit, jobid, associd); - if(!(result = mysql_db_query_ret(acct_mysql_db, query, 0))) { + if(!(result = mysql_db_query_ret(db_conn, query, 0))) { xfree(query); return -1; } @@ -974,7 +1141,7 @@ static mysql_db_info_t *_mysql_acct_create_db_info() return db_info; } -static int _mysql_acct_check_tables(MYSQL *acct_mysql_db) +static int _mysql_acct_check_tables(MYSQL *db_conn) { int rc = SLURM_SUCCESS; storage_field_t acct_coord_table_fields[] = { @@ -993,7 +1160,7 @@ static int _mysql_acct_check_tables(MYSQL *acct_mysql_db) { "name", "tinytext not null" }, { "description", "text not null" }, { "organization", "text not null" }, - { "qos", "smallint default 1 not null" }, + { "qos", "blob" }, { NULL, NULL} }; @@ -1096,6 +1263,16 @@ static int _mysql_acct_check_tables(MYSQL *acct_mysql_db) { NULL, NULL} }; + storage_field_t qos_table_fields[] = { + { "creation_time", "int unsigned not null" }, + { "mod_time", "int unsigned default 0 not null" }, + { "deleted", "tinyint default 0" }, + { "id", "int not null auto_increment" }, + { "name", "tinytext not null" }, + { "description", "text" }, + { NULL, NULL} + }; + storage_field_t step_table_fields[] = { { "id", "int not null" }, { "stepid", "smallint not null" }, @@ -1155,7 +1332,7 @@ static int _mysql_acct_check_tables(MYSQL *acct_mysql_db) { "deleted", "tinyint default 0" }, { "name", "tinytext not null" }, { "default_acct", "tinytext not null" }, - { "qos", "smallint default 1 not null" }, + { "qos", "blob" }, { "admin_level", "smallint default 1 not null" }, { NULL, NULL} }; @@ -1204,102 +1381,120 @@ static int _mysql_acct_check_tables(MYSQL *acct_mysql_db) "&& @mcpj != -1) || @my_acct = '' END REPEAT; " "END;"; - if(mysql_db_create_table(acct_mysql_db, acct_coord_table, + if(mysql_db_create_table(db_conn, acct_coord_table, acct_coord_table_fields, ", primary key (acct(20), user(20)))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, acct_table, acct_table_fields, + if(mysql_db_create_table(db_conn, acct_table, acct_table_fields, ", primary key (name(20)))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, assoc_day_table, + if(mysql_db_create_table(db_conn, assoc_day_table, assoc_usage_table_fields, ", primary key (id, period_start))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, assoc_hour_table, + if(mysql_db_create_table(db_conn, assoc_hour_table, assoc_usage_table_fields, ", primary key (id, period_start))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, assoc_month_table, + if(mysql_db_create_table(db_conn, assoc_month_table, assoc_usage_table_fields, ", primary key (id, period_start))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, assoc_table, assoc_table_fields, + if(mysql_db_create_table(db_conn, assoc_table, assoc_table_fields, ", primary key (id), " " unique index (user(20), acct(20), " "cluster(20), partition(20)))" /* " unique index (lft), " */ -/* " unique index (rgt))" */) + /* " unique index (rgt))" */) == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, cluster_day_table, + if(mysql_db_create_table(db_conn, cluster_day_table, cluster_usage_table_fields, ", primary key (cluster(20), period_start))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, cluster_hour_table, + if(mysql_db_create_table(db_conn, cluster_hour_table, cluster_usage_table_fields, ", primary key (cluster(20), period_start))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, cluster_month_table, + if(mysql_db_create_table(db_conn, cluster_month_table, cluster_usage_table_fields, ", primary key (cluster(20), period_start))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, cluster_table, + if(mysql_db_create_table(db_conn, cluster_table, cluster_table_fields, ", primary key (name(20)))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, event_table, + if(mysql_db_create_table(db_conn, event_table, event_table_fields, ", primary key (node_name(20), cluster(20), " "period_start))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, job_table, job_table_fields, + if(mysql_db_create_table(db_conn, job_table, job_table_fields, ", primary key (id), " "unique index (jobid, associd, submit))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, last_ran_table, + if(mysql_db_create_table(db_conn, last_ran_table, last_ran_table_fields, ")") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, step_table, + if(mysql_db_create_table(db_conn, qos_table, + qos_table_fields, + ", primary key (id), " + "unique index (name(20)))") + == SLURM_ERROR) + return SLURM_ERROR; + else { + time_t now = time(NULL); + char *query = xstrdup_printf( + "insert into %s " + "(creation_time, mod_time, name, description) " + "values (%d, %d, 'normal', 'Normal QOS default') " + "on duplicate key update deleted=0;", + qos_table, now, now); + debug3("%s", query); + mysql_db_query(db_conn, query); + xfree(query); + } + if(mysql_db_create_table(db_conn, step_table, step_table_fields, ", primary key (id, stepid))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, suspend_table, + if(mysql_db_create_table(db_conn, suspend_table, suspend_table_fields, ")") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, txn_table, txn_table_fields, + if(mysql_db_create_table(db_conn, txn_table, txn_table_fields, ", primary key (id))") == SLURM_ERROR) return SLURM_ERROR; - if(mysql_db_create_table(acct_mysql_db, user_table, user_table_fields, + if(mysql_db_create_table(db_conn, user_table, user_table_fields, ", primary key (name(20)))") == SLURM_ERROR) return SLURM_ERROR; - rc = mysql_db_query(acct_mysql_db, get_parent_proc); + rc = mysql_db_query(db_conn, get_parent_proc); return rc; } @@ -1314,7 +1509,7 @@ extern int init ( void ) static int first = 1; int rc = SLURM_SUCCESS; #ifdef HAVE_MYSQL - MYSQL *acct_mysql_db = NULL; + MYSQL *db_conn = NULL; char *location = NULL; #else fatal("No MySQL database was found on the machine. " @@ -1355,11 +1550,11 @@ extern int init ( void ) debug2("mysql_connect() called for db %s", mysql_db_name); - mysql_get_db_connection(&acct_mysql_db, mysql_db_name, mysql_db_info); + mysql_get_db_connection(&db_conn, mysql_db_name, mysql_db_info); - rc = _mysql_acct_check_tables(acct_mysql_db); + rc = _mysql_acct_check_tables(db_conn); - mysql_close_db_connection(&acct_mysql_db); + mysql_close_db_connection(&db_conn); #endif @@ -1393,11 +1588,11 @@ extern void *acct_storage_p_get_connection(bool make_agent, bool rollback) debug2("acct_storage_p_get_connection: request new connection"); - mysql_get_db_connection(&mysql_conn->acct_mysql_db, + mysql_get_db_connection(&mysql_conn->db_conn, mysql_db_name, mysql_db_info); mysql_conn->rollback = rollback; if(rollback) { - mysql_autocommit(mysql_conn->acct_mysql_db, 0); + mysql_autocommit(mysql_conn->db_conn, 0); } mysql_conn->conn = conn++; mysql_conn->update_list = list_create(destroy_acct_update_object); @@ -1415,7 +1610,7 @@ extern int acct_storage_p_close_connection(mysql_conn_t **mysql_conn) return SLURM_SUCCESS; acct_storage_p_commit((*mysql_conn), 0); - mysql_close_db_connection(&(*mysql_conn)->acct_mysql_db); + mysql_close_db_connection(&(*mysql_conn)->db_conn); list_destroy((*mysql_conn)->update_list); xfree((*mysql_conn)); @@ -1435,10 +1630,10 @@ extern int acct_storage_p_commit(mysql_conn_t *mysql_conn, bool commit) if(mysql_conn->rollback) { if(!commit) { - if(mysql_db_rollback(mysql_conn->acct_mysql_db)) + if(mysql_db_rollback(mysql_conn->db_conn)) error("rollback failed"); } else { - if(mysql_db_commit(mysql_conn->acct_mysql_db)) + if(mysql_db_commit(mysql_conn->db_conn)) error("commit failed"); } } @@ -1464,7 +1659,7 @@ extern int acct_storage_p_commit(mysql_conn_t *mysql_conn, bool commit) "where deleted=0 && control_port != 0", cluster_table); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); goto skip; } @@ -1526,6 +1721,10 @@ extern int acct_storage_p_commit(mysql_conn_t *mysql_conn, bool commit) case ACCT_REMOVE_ASSOC: rc = assoc_mgr_update_local_assocs(object); break; + case ACCT_ADD_QOS: + case ACCT_REMOVE_QOS: + rc = assoc_mgr_update_local_qos(object); + break; case ACCT_UPDATE_NOTSET: default: error("unknown type set in " @@ -1579,11 +1778,30 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid, xstrfmtcat(vals, "%d, %d, '%s', '%s'", now, now, object->name, object->default_acct); xstrfmtcat(extra, ", default_acct='%s'", object->default_acct); - if(object->qos != ACCT_QOS_NOTSET) { + if(object->qos_list && list_count(object->qos_list)) { + char *qos_val = NULL; + char *tmp_char = NULL; + ListIterator qos_itr = + list_iterator_create(object->qos_list); xstrcat(cols, ", qos"); - xstrfmtcat(vals, ", %u", object->qos); - xstrfmtcat(extra, ", qos=%u", object->qos); + while((tmp_char = list_next(qos_itr))) { + xstrfmtcat(qos_val, ",%s", tmp_char); + } + + xstrfmtcat(vals, ", '%s'", qos_val); + xstrfmtcat(extra, ", qos='%s'", qos_val); } + /* Since I don't really want to go find out which id + * normal is we are not going to add it at all which + * isn't a big deal since if the list is blank the user + * will get it be default + */ + /* else { */ +/* /\* Add normal qos to the user *\/ */ +/* xstrcat(cols, ", qos"); */ +/* xstrfmtcat(vals, ", ',0'"); */ +/* xstrfmtcat(extra, ", qos=',0'"); */ +/* } */ if(object->admin_level != ACCT_ADMIN_NOTSET) { xstrcat(cols, ", admin_level"); @@ -1598,7 +1816,7 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid, xfree(cols); xfree(vals); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't add user %s", object->name); @@ -1606,7 +1824,7 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid, continue; } - affect_rows = _last_affected_rows(mysql_conn->acct_mysql_db); + affect_rows = _last_affected_rows(mysql_conn->db_conn); if(!affect_rows) { debug("nothing changed"); xfree(extra); @@ -1643,7 +1861,7 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid, if(rc != SLURM_ERROR) { if(txn_query) { xstrcat(txn_query, ";"); - rc = mysql_db_query(mysql_conn->acct_mysql_db, + rc = mysql_db_query(mysql_conn->db_conn, txn_query); xfree(txn_query); if(rc != SLURM_SUCCESS) { @@ -1670,7 +1888,7 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid, } extern int acct_storage_p_add_coord(mysql_conn_t *mysql_conn, uint32_t uid, - List acct_list, acct_user_cond_t *user_q) + List acct_list, acct_user_cond_t *user_cond) { #ifdef HAVE_MYSQL char *query = NULL, *user = NULL, *acct = NULL; @@ -1681,7 +1899,9 @@ extern int acct_storage_p_add_coord(mysql_conn_t *mysql_conn, uint32_t uid, int rc = SLURM_SUCCESS; acct_user_rec_t *user_rec = NULL; - if(!user_q || !user_q->user_list || !list_count(user_q->user_list) + if(!user_cond || !user_cond->assoc_cond + || !user_cond->assoc_cond->user_list + || !list_count(user_cond->assoc_cond->user_list) || !acct_list || !list_count(acct_list)) { error("we need something to add"); return SLURM_ERROR; @@ -1694,7 +1914,7 @@ extern int acct_storage_p_add_coord(mysql_conn_t *mysql_conn, uint32_t uid, user_name = pw->pw_name; } - itr = list_iterator_create(user_q->user_list); + itr = list_iterator_create(user_cond->assoc_cond->user_list); itr2 = list_iterator_create(acct_list); while((user = list_next(itr))) { while((acct = list_next(itr2))) { @@ -1734,8 +1954,9 @@ extern int acct_storage_p_add_coord(mysql_conn_t *mysql_conn, uint32_t uid, xstrfmtcat(query, " on duplicate key update mod_time=%d, deleted=0;%s", now, txn_query); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); xfree(txn_query); @@ -1744,7 +1965,7 @@ extern int acct_storage_p_add_coord(mysql_conn_t *mysql_conn, uint32_t uid, return rc; } /* get the update list set */ - itr = list_iterator_create(user_q->user_list); + itr = list_iterator_create(user_cond->assoc_cond->user_list); while((user = list_next(itr))) { user_rec = xmalloc(sizeof(acct_user_rec_t)); user_rec->name = xstrdup(user); @@ -1802,10 +2023,18 @@ extern int acct_storage_p_add_accts(mysql_conn_t *mysql_conn, uint32_t uid, xstrfmtcat(extra, ", description='%s', organization='%s'", object->description, object->organization); - if(object->qos != ACCT_QOS_NOTSET) { + if(object->qos_list && list_count(object->qos_list)) { + char *qos_val = NULL; + char *tmp_char = NULL; + ListIterator qos_itr = + list_iterator_create(object->qos_list); xstrcat(cols, ", qos"); - xstrfmtcat(vals, ", %u", object->qos); - xstrfmtcat(extra, ", qos=%u", object->qos); + while((tmp_char = list_next(qos_itr))) { + xstrfmtcat(qos_val, ",%s", tmp_char); + } + + xstrfmtcat(vals, ", '%s'", qos_val); + xstrfmtcat(extra, ", qos='%s'", qos_val); } query = xstrdup_printf( @@ -1813,8 +2042,9 @@ extern int acct_storage_p_add_accts(mysql_conn_t *mysql_conn, uint32_t uid, "on duplicate key update deleted=0, mod_time=%d %s;", acct_table, cols, vals, now, extra); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(cols); xfree(vals); xfree(query); @@ -1823,7 +2053,7 @@ extern int acct_storage_p_add_accts(mysql_conn_t *mysql_conn, uint32_t uid, xfree(extra); continue; } - affect_rows = _last_affected_rows(mysql_conn->acct_mysql_db); + affect_rows = _last_affected_rows(mysql_conn->db_conn); /* debug3("affected %d", affect_rows); */ if(!affect_rows) { @@ -1857,7 +2087,7 @@ extern int acct_storage_p_add_accts(mysql_conn_t *mysql_conn, uint32_t uid, if(rc != SLURM_ERROR) { if(txn_query) { xstrcat(txn_query, ";"); - rc = mysql_db_query(mysql_conn->acct_mysql_db, + rc = mysql_db_query(mysql_conn->db_conn, txn_query); xfree(txn_query); if(rc != SLURM_SUCCESS) { @@ -1934,7 +2164,8 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid, object->default_max_cpu_secs_per_job); xstrfmtcat(extra, ", max_cpu_secs_per_job=%u", object->default_max_cpu_secs_per_job); - } else if((int)object->default_max_cpu_secs_per_job == INFINITE) { + } else if((int)object->default_max_cpu_secs_per_job + == INFINITE) { xstrcat(cols, ", max_cpu_secs_per_job"); xstrfmtcat(vals, ", NULL"); xstrfmtcat(extra, ", max_cpu_secs_per_job=NULL"); @@ -1983,8 +2214,9 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid, cluster_table, now, now, object->name, now); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't add cluster %s", object->name); @@ -1995,7 +2227,7 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid, break; } - affect_rows = _last_affected_rows(mysql_conn->acct_mysql_db); + affect_rows = _last_affected_rows(mysql_conn->db_conn); if(!affect_rows) { debug2("nothing changed %d", affect_rows); @@ -2020,9 +2252,10 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid, xfree(cols); xfree(vals); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { @@ -2039,7 +2272,7 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid, now, DBD_ADD_CLUSTERS, object->name, user, extra); xfree(extra); debug4("query\n%s",query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't add txn"); @@ -2050,7 +2283,7 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid, if(!added) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); } @@ -2203,13 +2436,14 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, } xstrfmtcat(query, - "select distinct %s from %s %s order by lft " + "select distinct %s from %s %s order by lft " "FOR UPDATE;", tmp_char, assoc_table, update); xfree(tmp_char); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); xfree(cols); xfree(vals); @@ -2223,6 +2457,16 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, assoc_id = 0; if(!(row = mysql_fetch_row(result))) { + /* This code speeds up the add process quite a bit + * here we are only doing an update when we are done + * adding to a specific group (cluster/account) other + * than that we are adding right behind what we were + * so just total them up and then do one update + * instead of the slow ones that require an update + * every time. There is a incr check outside of the + * loop to catch everything on the last spin of the + * while. + */ if(!old_parent || !old_cluster || strcasecmp(parent, old_parent) || strcasecmp(object->cluster, old_cluster)) { @@ -2251,7 +2495,7 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, debug3("%d query\n%s", mysql_conn->conn, up_query); rc = mysql_db_query( - mysql_conn->acct_mysql_db, + mysql_conn->db_conn, up_query); xfree(up_query); if(rc != SLURM_SUCCESS) { @@ -2268,7 +2512,7 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, debug3("%d query\n%s", mysql_conn->conn, sel_query); if(!(sel_result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, + mysql_conn->db_conn, sel_query, 0))) { xfree(cols); xfree(vals); @@ -2330,6 +2574,7 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, /* assoc_table, cols, */ /* vals); */ } else if(!atoi(row[MASSOC_DELETED])) { + /* We don't need to do anything here */ debug("This account was added already"); xfree(cols); xfree(vals); @@ -2338,6 +2583,11 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, xfree(extra); continue; } else { + /* If it was once deleted we have kept the lft + * and rgt's consant while it was deleted and + * so we can just unset the deleted flag, + * check for the parent and move if needed. + */ assoc_id = atoi(row[MASSOC_ID]); if(object->parent_acct && strcasecmp(object->parent_acct, @@ -2368,8 +2618,9 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, xfree(cols); xfree(vals); xfree(update); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't add assoc"); @@ -2381,8 +2632,8 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, */ if(!assoc_id) { affect_rows = _last_affected_rows( - mysql_conn->acct_mysql_db); - assoc_id = mysql_insert_id(mysql_conn->acct_mysql_db); + mysql_conn->db_conn); + assoc_id = mysql_insert_id(mysql_conn->db_conn); //info("last id was %d", assoc_id); } @@ -2424,10 +2675,11 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, assoc_table, incr, my_left, assoc_table); - debug3("%d query\n%s", mysql_conn->conn, up_query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, up_query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, up_query); + rc = mysql_db_query(mysql_conn->db_conn, up_query); xfree(up_query); - if(rc != SLURM_SUCCESS) + if(rc != SLURM_SUCCESS) error("Couldn't do update 2"); } @@ -2436,7 +2688,7 @@ end_it: if(rc != SLURM_ERROR) { if(txn_query) { xstrcat(txn_query, ";"); - rc = mysql_db_query(mysql_conn->acct_mysql_db, + rc = mysql_db_query(mysql_conn->db_conn, txn_query); xfree(txn_query); if(rc != SLURM_SUCCESS) { @@ -2447,7 +2699,7 @@ end_it: } else { xfree(txn_query); if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); } @@ -2461,8 +2713,98 @@ end_it: #endif } +extern int acct_storage_p_add_qos(mysql_conn_t *mysql_conn, uint32_t uid, + List qos_list) +{ +#ifdef HAVE_MYSQL + ListIterator itr = NULL; + int rc = SLURM_SUCCESS; + acct_qos_rec_t *object = NULL; + char *query = NULL; + time_t now = time(NULL); + struct passwd *pw = NULL; + char *user = NULL; + int affect_rows = 0; + int added = 0; + + if(_check_connection(mysql_conn) != SLURM_SUCCESS) + return SLURM_ERROR; + + if((pw=getpwuid(uid))) { + user = pw->pw_name; + } + + itr = list_iterator_create(qos_list); + while((object = list_next(itr))) { + if(!object->name) { + error("We need a qos name to add."); + rc = SLURM_ERROR; + continue; + } + + xstrfmtcat(query, + "insert into %s (creation_time, mod_time, " + "name, description) " + "values (%d, %d, '%s', '%s') " + "on duplicate key update deleted=0, mod_time=%d;", + qos_table, + now, now, object->name, object->description, + now); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); + xfree(query); + if(rc != SLURM_SUCCESS) { + error("Couldn't add qos %s", object->name); + added=0; + break; + } + + affect_rows = _last_affected_rows(mysql_conn->db_conn); + + if(!affect_rows) { + debug2("nothing changed %d", affect_rows); + continue; + } + xstrfmtcat(query, + "insert into %s " + "(timestamp, action, name, actor, info) " + "values (%d, %u, '%s', '%s', \"%s\");", + txn_table, + now, DBD_ADD_QOS, object->name, user, + object->description); + + debug4("query\n%s",query); + rc = mysql_db_query(mysql_conn->db_conn, query); + xfree(query); + if(rc != SLURM_SUCCESS) { + error("Couldn't add txn"); + } else { + if(_addto_update_list(mysql_conn->update_list, + ACCT_ADD_QOS, + object) == SLURM_SUCCESS) + list_remove(itr); + added++; + } + + } + list_iterator_destroy(itr); + + if(!added) { + if(mysql_conn->rollback) { + mysql_db_rollback(mysql_conn->db_conn); + } + list_flush(mysql_conn->update_list); + } + + return rc; +#else + return SLURM_ERROR; +#endif +} + extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, - acct_user_cond_t *user_q, + acct_user_cond_t *user_cond, acct_user_rec_t *user) { #ifdef HAVE_MYSQL @@ -2477,8 +2819,9 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, int set = 0; MYSQL_RES *result = NULL; MYSQL_ROW row; + int replace_qos = 0; - if(!user_q) { + if(!user_cond) { error("we need something to change"); return NULL; } @@ -2491,10 +2834,11 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, } xstrcat(extra, "where deleted=0"); - if(user_q->user_list && list_count(user_q->user_list)) { + if(user_cond->assoc_cond && user_cond->assoc_cond->user_list + && list_count(user_cond->assoc_cond->user_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(user_q->user_list); + itr = list_iterator_create(user_cond->assoc_cond->user_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2505,10 +2849,10 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, xstrcat(extra, ")"); } - if(user_q->def_acct_list && list_count(user_q->def_acct_list)) { + if(user_cond->def_acct_list && list_count(user_cond->def_acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(user_q->def_acct_list); + itr = list_iterator_create(user_cond->def_acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2519,19 +2863,58 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, xstrcat(extra, ")"); } - if(user_q->qos != ACCT_QOS_NOTSET) { - xstrfmtcat(extra, " && qos=%u", user_q->qos); + if(user_cond->qos_list && list_count(user_cond->qos_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(user_cond->qos_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, + "(qos like '%%,%s' || qos like '%%,%s,%%')", + object, object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); } - if(user_q->admin_level != ACCT_ADMIN_NOTSET) { - xstrfmtcat(extra, " && admin_level=%u", user_q->admin_level); + if(user_cond->admin_level != ACCT_ADMIN_NOTSET) { + xstrfmtcat(extra, " && admin_level=%u", user_cond->admin_level); } if(user->default_acct) xstrfmtcat(vals, ", default_acct='%s'", user->default_acct); - if(user->qos != ACCT_QOS_NOTSET) - xstrfmtcat(vals, ", qos=%u", user->qos); + if(user->qos_list && list_count(user->qos_list)) { + char *tmp_qos = NULL; + set = 0; + itr = list_iterator_create(user->qos_list); + while((object = list_next(itr))) { + /* when adding we need to make sure we don't + * already have it so we remove it and then add + * it. + */ + if(object[0] == '-') { + xstrfmtcat(vals, + ", qos=replace(qos, ',%s', '')", + object+1); + } else if(object[0] == '+') { + xstrfmtcat(vals, + ", qos=concat(" + "replace(qos, ',%s', ''), ',%s')", + object+1, object+1); + } else { + xstrfmtcat(tmp_qos, ",%s", object); + } + } + list_iterator_destroy(itr); + if(tmp_qos) { + xstrfmtcat(vals, ", qos='%s'", tmp_qos); + xfree(tmp_qos); + replace_qos = 1; + } + } if(user->admin_level != ACCT_ADMIN_NOTSET) xstrfmtcat(vals, ", admin_level=%u", user->admin_level); @@ -2541,10 +2924,11 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, error("Nothing to change"); return NULL; } - query = xstrdup_printf("select name from %s %s;", user_table, extra); + query = xstrdup_printf("select name, qos from %s %s;", + user_table, extra); xfree(extra); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -2552,6 +2936,8 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, rc = 0; ret_list = list_create(slurm_destroy_char); while((row = mysql_fetch_row(result))) { + acct_user_rec_t *user_rec = NULL; + object = xstrdup(row[0]); list_append(ret_list, object); if(!rc) { @@ -2560,6 +2946,62 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, } else { xstrfmtcat(name_char, " || name='%s'", object); } + user_rec = xmalloc(sizeof(acct_user_rec_t)); + user_rec->name = xstrdup(object); + user_rec->default_acct = xstrdup(user->default_acct); + user_rec->admin_level = user->admin_level; + if(user->qos_list) { + ListIterator new_qos_itr = + list_iterator_create(user->qos_list); + ListIterator curr_qos_itr = NULL; + char *new_qos = NULL, *curr_qos = NULL; + + user_rec->qos_list = list_create(slurm_destroy_char); + if(!replace_qos) + slurm_addto_char_list(user_rec->qos_list, + row[1]); + curr_qos_itr = list_iterator_create(user_rec->qos_list); + + while((new_qos = list_next(new_qos_itr))) { + char *tmp_char = NULL; + if(new_qos[0] == '-') { + tmp_char = xstrdup(object+1); + while((curr_qos = + list_next(curr_qos_itr))) { + if(!strcmp(curr_qos, + tmp_char)) { + list_delete_item( + curr_qos_itr); + break; + } + } + xfree(tmp_char); + list_iterator_reset(curr_qos_itr); + } else if(new_qos[0] == '+') { + tmp_char = xstrdup(object+1); + while((curr_qos = + list_next(curr_qos_itr))) { + if(!strcmp(curr_qos, + tmp_char)) { + break; + } + } + if(!curr_qos) + list_append(user_rec->qos_list, + tmp_char); + else + xfree(tmp_char); + list_iterator_reset(curr_qos_itr); + } else { + list_append(user_rec->qos_list, + xstrdup(object)); + } + } + list_iterator_destroy(curr_qos_itr); + list_iterator_destroy(new_qos_itr); + } + _addto_update_list(mysql_conn->update_list, ACCT_MODIFY_USER, + user_rec); } mysql_free_result(result); @@ -2592,7 +3034,7 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, extern List acct_storage_p_modify_accounts( mysql_conn_t *mysql_conn, uint32_t uid, - acct_account_cond_t *acct_q, + acct_account_cond_t *acct_cond, acct_account_rec_t *acct) { #ifdef HAVE_MYSQL @@ -2608,7 +3050,7 @@ extern List acct_storage_p_modify_accounts( MYSQL_RES *result = NULL; MYSQL_ROW row; - if(!acct_q) { + if(!acct_cond) { error("we need something to change"); return NULL; } @@ -2621,10 +3063,12 @@ extern List acct_storage_p_modify_accounts( } xstrcat(extra, "where deleted=0"); - if(acct_q->acct_list && list_count(acct_q->acct_list)) { + if(acct_cond->assoc_cond + && acct_cond->assoc_cond->acct_list + && list_count(acct_cond->assoc_cond->acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->acct_list); + itr = list_iterator_create(acct_cond->assoc_cond->acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2635,10 +3079,11 @@ extern List acct_storage_p_modify_accounts( xstrcat(extra, ")"); } - if(acct_q->description_list && list_count(acct_q->description_list)) { + if(acct_cond->description_list + && list_count(acct_cond->description_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->description_list); + itr = list_iterator_create(acct_cond->description_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2649,10 +3094,11 @@ extern List acct_storage_p_modify_accounts( xstrcat(extra, ")"); } - if(acct_q->organization_list && list_count(acct_q->organization_list)) { + if(acct_cond->organization_list + && list_count(acct_cond->organization_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->organization_list); + itr = list_iterator_create(acct_cond->organization_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2663,16 +3109,55 @@ extern List acct_storage_p_modify_accounts( xstrcat(extra, ")"); } - if(acct_q->qos != ACCT_QOS_NOTSET) { - xstrfmtcat(extra, " && qos=%u", acct_q->qos); + if(acct_cond->qos_list && list_count(acct_cond->qos_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(acct_cond->qos_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, + "(qos like '%%,%s' || qos like '%%,%s,%%')", + object, object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); } if(acct->description) xstrfmtcat(vals, ", description='%s'", acct->description); if(acct->organization) - xstrfmtcat(vals, ", organization='%u'", acct->organization); - if(acct->qos != ACCT_QOS_NOTSET) - xstrfmtcat(vals, ", qos='%u'", acct->qos); + xstrfmtcat(vals, ", organization='%s'", acct->organization); + + if(acct->qos_list && list_count(acct->qos_list)) { + char *tmp_qos = NULL; + set = 0; + itr = list_iterator_create(acct->qos_list); + while((object = list_next(itr))) { + /* when adding we need to make sure we don't + * already have it so we remove it and then add + * it. + */ + if(object[0] == '-') { + xstrfmtcat(vals, + ", qos=replace(qos, ',%s', '')", + object+1); + } else if(object[0] == '+') { + xstrfmtcat(vals, + ", qos=concat(" + "replace(qos, ',%s', ''), ',%s')", + object+1, object+1); + } else { + xstrfmtcat(tmp_qos, ",%s", object); + } + } + list_iterator_destroy(itr); + if(tmp_qos) { + xstrfmtcat(vals, ", qos='%s'", tmp_qos); + xfree(tmp_qos); + } + } if(!extra || !vals) { errno = SLURM_NO_CHANGE_IN_DATA; @@ -2682,9 +3167,9 @@ extern List acct_storage_p_modify_accounts( query = xstrdup_printf("select name from %s %s;", acct_table, extra); xfree(extra); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); xfree(vals); return NULL; @@ -2735,7 +3220,7 @@ extern List acct_storage_p_modify_accounts( extern List acct_storage_p_modify_clusters(mysql_conn_t *mysql_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q, + acct_cluster_cond_t *cluster_cond, acct_cluster_rec_t *cluster) { #ifdef HAVE_MYSQL @@ -2757,7 +3242,7 @@ extern List acct_storage_p_modify_clusters(mysql_conn_t *mysql_conn, * the controller when it loads */ - if(!cluster_q) { + if(!cluster_cond) { error("we need something to change"); return NULL; } @@ -2770,10 +3255,11 @@ extern List acct_storage_p_modify_clusters(mysql_conn_t *mysql_conn, } xstrcat(extra, "where deleted=0"); - if(cluster_q->cluster_list && list_count(cluster_q->cluster_list)) { + if(cluster_cond->cluster_list + && list_count(cluster_cond->cluster_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(cluster_q->cluster_list); + itr = list_iterator_create(cluster_cond->cluster_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2800,9 +3286,9 @@ extern List acct_storage_p_modify_clusters(mysql_conn_t *mysql_conn, xstrfmtcat(query, "select name from %s %s;", cluster_table, extra); xfree(extra); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); xfree(vals); error("no result given for %s", extra); @@ -2856,10 +3342,10 @@ end_it: #endif } -extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, - uint32_t uid, - acct_association_cond_t *assoc_q, - acct_association_rec_t *assoc) +extern List acct_storage_p_modify_associations( + mysql_conn_t *mysql_conn, uint32_t uid, + acct_association_cond_t *assoc_cond, + acct_association_rec_t *assoc) { #ifdef HAVE_MYSQL ListIterator itr = NULL; @@ -2898,7 +3384,7 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, MASSOC_COUNT }; - if(!assoc_q) { + if(!assoc_cond) { error("we need something to change"); return NULL; } @@ -2949,10 +3435,10 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, user_name = pw->pw_name; } - if(assoc_q->acct_list && list_count(assoc_q->acct_list)) { + if(assoc_cond->acct_list && list_count(assoc_cond->acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->acct_list); + itr = list_iterator_create(assoc_cond->acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2963,10 +3449,10 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->cluster_list && list_count(assoc_q->cluster_list)) { + if(assoc_cond->cluster_list && list_count(assoc_cond->cluster_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->cluster_list); + itr = list_iterator_create(assoc_cond->cluster_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2977,10 +3463,10 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->user_list && list_count(assoc_q->user_list)) { + if(assoc_cond->user_list && list_count(assoc_cond->user_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->user_list); + itr = list_iterator_create(assoc_cond->user_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -2994,10 +3480,10 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, xstrcat(extra, " && user = '' "); } - if(assoc_q->id_list && list_count(assoc_q->id_list)) { + if(assoc_cond->id_list && list_count(assoc_cond->id_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->id_list); + itr = list_iterator_create(assoc_cond->id_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3008,8 +3494,9 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->parent_acct) { - xstrfmtcat(extra, " && parent_acct='%s'", assoc_q->parent_acct); + if(assoc_cond->parent_acct) { + xstrfmtcat(extra, " && parent_acct='%s'", + assoc_cond->parent_acct); } if((int)assoc->fairshare >= 0) @@ -3059,9 +3546,9 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, xfree(object); xfree(extra); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -3100,7 +3587,7 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, error("We are here with no coord accts."); if(mysql_conn->rollback) { mysql_db_rollback( - mysql_conn->acct_mysql_db); + mysql_conn->db_conn); } errno = ESLURM_ACCESS_DENIED; mysql_free_result(result); @@ -3133,7 +3620,7 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, if(mysql_conn->rollback) { mysql_db_rollback( - mysql_conn->acct_mysql_db); + mysql_conn->db_conn); } errno = ESLURM_ACCESS_DENIED; mysql_free_result(result); @@ -3225,7 +3712,7 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, if(assoc->parent_acct) { if(rc != SLURM_SUCCESS) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); list_destroy(ret_list); @@ -3238,7 +3725,7 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, if(!list_count(ret_list)) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } errno = SLURM_NO_CHANGE_IN_DATA; debug3("didn't effect anything"); @@ -3252,7 +3739,7 @@ extern List acct_storage_p_modify_associations(mysql_conn_t *mysql_conn, user_name, assoc_table, name_char, vals) == SLURM_ERROR) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); error("Couldn't modify associations"); @@ -3273,7 +3760,7 @@ end_it: } extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { #ifdef HAVE_MYSQL ListIterator itr = NULL; @@ -3289,8 +3776,8 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, MYSQL_RES *result = NULL; MYSQL_ROW row; - if(!user_q) { - error("we need something to change"); + if(!user_cond) { + error("we need something to remove"); return NULL; } @@ -3303,10 +3790,11 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, xstrcat(extra, "where deleted=0"); - if(user_q->user_list && list_count(user_q->user_list)) { + if(user_cond->assoc_cond && user_cond->assoc_cond->user_list + && list_count(user_cond->assoc_cond->user_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(user_q->user_list); + itr = list_iterator_create(user_cond->assoc_cond->user_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3317,10 +3805,10 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, xstrcat(extra, ")"); } - if(user_q->def_acct_list && list_count(user_q->def_acct_list)) { + if(user_cond->def_acct_list && list_count(user_cond->def_acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(user_q->def_acct_list); + itr = list_iterator_create(user_cond->def_acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3331,12 +3819,24 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, xstrcat(extra, ")"); } - if(user_q->qos != ACCT_QOS_NOTSET) { - xstrfmtcat(extra, " && qos=%u", user_q->qos); + if(user_cond->qos_list && list_count(user_cond->qos_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(user_cond->qos_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, + "(qos like '%%,%s' || qos like '%%,%s,%%')", + object, object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); } - if(user_q->admin_level != ACCT_ADMIN_NOTSET) { - xstrfmtcat(extra, " && admin_level=%u", user_q->admin_level); + if(user_cond->admin_level != ACCT_ADMIN_NOTSET) { + xstrfmtcat(extra, " && admin_level=%u", user_cond->admin_level); } if(!extra) { @@ -3347,7 +3847,7 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, query = xstrdup_printf("select name from %s %s;", user_table, extra); xfree(extra); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -3356,6 +3856,8 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, ret_list = list_create(slurm_destroy_char); while((row = mysql_fetch_row(result))) { char *object = xstrdup(row[0]); + acct_user_rec_t *user_rec = NULL; + list_append(ret_list, object); if(!rc) { xstrfmtcat(name_char, "name='%s'", object); @@ -3365,6 +3867,11 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, xstrfmtcat(name_char, " || name='%s'", object); xstrfmtcat(assoc_char, " || t2.user='%s'", object); } + user_rec = xmalloc(sizeof(acct_user_rec_t)); + user_rec->name = xstrdup(object); + _addto_update_list(mysql_conn->update_list, ACCT_REMOVE_USER, + user_rec); + } mysql_free_result(result); @@ -3391,7 +3898,7 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, acct_coord_table, now, assoc_char); xfree(assoc_char); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't remove user coordinators"); @@ -3408,7 +3915,7 @@ extern List acct_storage_p_remove_users(mysql_conn_t *mysql_conn, uint32_t uid, extern List acct_storage_p_remove_coord(mysql_conn_t *mysql_conn, uint32_t uid, List acct_list, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { #ifdef HAVE_MYSQL char *query = NULL, *object = NULL, *extra = NULL, *last_user = NULL; @@ -3424,6 +3931,11 @@ extern List acct_storage_p_remove_coord(mysql_conn_t *mysql_conn, uint32_t uid, MYSQL_ROW row; acct_user_rec_t user; + if(!user_cond) { + error("we need something to remove"); + return NULL; + } + if(_check_connection(mysql_conn) != SLURM_SUCCESS) return NULL; @@ -3470,14 +3982,17 @@ extern List acct_storage_p_remove_coord(mysql_conn_t *mysql_conn, uint32_t uid, user_name = pw->pw_name; } - if(user_q->user_list && list_count(user_q->user_list)) { + /* Leave it this way since we are using extra below */ + + if(user_cond->assoc_cond && user_cond->assoc_cond->user_list + && list_count(user_cond->assoc_cond->user_list)) { set = 0; if(extra) xstrcat(extra, " && ("); else xstrcat(extra, " ("); - itr = list_iterator_create(user_q->user_list); + itr = list_iterator_create(user_cond->assoc_cond->user_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3505,15 +4020,23 @@ extern List acct_storage_p_remove_coord(mysql_conn_t *mysql_conn, uint32_t uid, list_iterator_destroy(itr); xstrcat(extra, ")"); } + + if(!extra) { + errno = SLURM_ERROR; + debug3("No conditions given"); + return NULL; + } + query = xstrdup_printf( "select user, acct from %s where deleted=0 && %s order by user", acct_coord_table, extra); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = - mysql_db_query_ret(mysql_conn->acct_mysql_db, query, 0))) { + mysql_db_query_ret(mysql_conn->db_conn, query, 0))) { xfree(query); xfree(extra); + errno = SLURM_ERROR; return NULL; } xfree(query); @@ -3566,6 +4089,7 @@ extern List acct_storage_p_remove_coord(mysql_conn_t *mysql_conn, uint32_t uid, list_destroy(ret_list); list_destroy(user_list); xfree(extra); + errno = SLURM_ERROR; return NULL; } xfree(extra); @@ -3588,7 +4112,7 @@ extern List acct_storage_p_remove_coord(mysql_conn_t *mysql_conn, uint32_t uid, } extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, - acct_account_cond_t *acct_q) + acct_account_cond_t *acct_cond) { #ifdef HAVE_MYSQL ListIterator itr = NULL; @@ -3604,7 +4128,7 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, MYSQL_RES *result = NULL; MYSQL_ROW row; - if(!acct_q) { + if(!acct_cond) { error("we need something to change"); return NULL; } @@ -3617,10 +4141,12 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, return NULL; xstrcat(extra, "where deleted=0"); - if(acct_q->acct_list && list_count(acct_q->acct_list)) { + if(acct_cond->assoc_cond + && acct_cond->assoc_cond->acct_list + && list_count(acct_cond->assoc_cond->acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->acct_list); + itr = list_iterator_create(acct_cond->assoc_cond->acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3631,10 +4157,11 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, xstrcat(extra, ")"); } - if(acct_q->description_list && list_count(acct_q->description_list)) { + if(acct_cond->description_list + && list_count(acct_cond->description_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->description_list); + itr = list_iterator_create(acct_cond->description_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3645,10 +4172,11 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, xstrcat(extra, ")"); } - if(acct_q->organization_list && list_count(acct_q->organization_list)) { + if(acct_cond->organization_list + && list_count(acct_cond->organization_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->organization_list); + itr = list_iterator_create(acct_cond->organization_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3659,10 +4187,22 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, xstrcat(extra, ")"); } - if(acct_q->qos != ACCT_QOS_NOTSET) { - xstrfmtcat(extra, " && qos=%u", acct_q->qos); - } - + if(acct_cond->qos_list && list_count(acct_cond->qos_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(acct_cond->qos_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, + "(qos like '%%,%s' || qos like '%%,%s,%%')", + object, object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + if(!extra) { error("Nothing to remove"); return NULL; @@ -3671,7 +4211,7 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, query = xstrdup_printf("select name from %s %s;", acct_table, extra); xfree(extra); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -3719,7 +4259,7 @@ extern List acct_storage_p_remove_accts(mysql_conn_t *mysql_conn, uint32_t uid, extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q) + acct_cluster_cond_t *cluster_cond) { #ifdef HAVE_MYSQL ListIterator itr = NULL; @@ -3736,7 +4276,7 @@ extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn, MYSQL_ROW row; int day_old = now - DELETE_SEC_BACK; - if(!cluster_q) { + if(!cluster_cond) { error("we need something to change"); return NULL; } @@ -3748,10 +4288,11 @@ extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn, user_name = pw->pw_name; } xstrcat(extra, "where deleted=0"); - if(cluster_q->cluster_list && list_count(cluster_q->cluster_list)) { + if(cluster_cond->cluster_list + && list_count(cluster_cond->cluster_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(cluster_q->cluster_list); + itr = list_iterator_create(cluster_cond->cluster_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3770,7 +4311,7 @@ extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn, query = xstrdup_printf("select name from %s %s;", cluster_table, extra); xfree(extra); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -3815,12 +4356,12 @@ extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn, cluster_hour_table, now, assoc_char, cluster_month_table, now, assoc_char); xfree(assoc_char); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); list_destroy(ret_list); @@ -3849,9 +4390,9 @@ extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn, #endif } -extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, - uint32_t uid, - acct_association_cond_t *assoc_q) +extern List acct_storage_p_remove_associations( + mysql_conn_t *mysql_conn, uint32_t uid, + acct_association_cond_t *assoc_cond) { #ifdef HAVE_MYSQL ListIterator itr = NULL; @@ -3889,7 +4430,7 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, RASSOC_COUNT }; - if(!assoc_q) { + if(!assoc_cond) { error("we need something to change"); return NULL; } @@ -3942,10 +4483,10 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, user_name = pw->pw_name; } - if(assoc_q->acct_list && list_count(assoc_q->acct_list)) { + if(assoc_cond->acct_list && list_count(assoc_cond->acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->acct_list); + itr = list_iterator_create(assoc_cond->acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3956,10 +4497,10 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->cluster_list && list_count(assoc_q->cluster_list)) { + if(assoc_cond->cluster_list && list_count(assoc_cond->cluster_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->cluster_list); + itr = list_iterator_create(assoc_cond->cluster_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3970,10 +4511,10 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->user_list && list_count(assoc_q->user_list)) { + if(assoc_cond->user_list && list_count(assoc_cond->user_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->user_list); + itr = list_iterator_create(assoc_cond->user_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3984,10 +4525,10 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->id_list && list_count(assoc_q->id_list)) { + if(assoc_cond->id_list && list_count(assoc_cond->id_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->id_list); + itr = list_iterator_create(assoc_cond->id_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -3998,9 +4539,9 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->parent_acct) { + if(assoc_cond->parent_acct) { xstrfmtcat(extra, " && parent_acct='%s'", - assoc_q->parent_acct); + assoc_cond->parent_acct); } for(i=0; i<RASSOC_COUNT; i++) { @@ -4014,7 +4555,7 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, assoc_table, extra); xfree(extra); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -4034,7 +4575,7 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, if(!name_char) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); errno = SLURM_NO_CHANGE_IN_DATA; @@ -4050,11 +4591,11 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, assoc_table, name_char); xfree(extra); xfree(object); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); xfree(query); @@ -4144,7 +4685,7 @@ extern List acct_storage_p_remove_associations(mysql_conn_t *mysql_conn, return ret_list; end_it: if(mysql_conn->rollback) { - mysql_db_rollback(mysql_conn->acct_mysql_db); + mysql_db_rollback(mysql_conn->db_conn); } list_flush(mysql_conn->update_list); @@ -4160,8 +4701,143 @@ end_it: #endif } +extern List acct_storage_p_remove_qos(mysql_conn_t *mysql_conn, uint32_t uid, + acct_qos_cond_t *qos_cond) +{ +#ifdef HAVE_MYSQL + ListIterator itr = NULL; + List ret_list = NULL; + int rc = SLURM_SUCCESS; + char *object = NULL; + char *extra = NULL, *query = NULL, + *name_char = NULL, *assoc_char = NULL; + time_t now = time(NULL); + struct passwd *pw = NULL; + char *user_name = NULL; + int set = 0; + MYSQL_RES *result = NULL; + MYSQL_ROW row; + + if(!qos_cond) { + error("we need something to change"); + return NULL; + } + + if((pw=getpwuid(uid))) { + user_name = pw->pw_name; + } + + if(_check_connection(mysql_conn) != SLURM_SUCCESS) + return NULL; + + xstrcat(extra, "where deleted=0"); + if(qos_cond->description_list + && list_count(qos_cond->description_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(qos_cond->description_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "description='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + + if(qos_cond->id_list + && list_count(qos_cond->id_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(qos_cond->id_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "id='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + + if(qos_cond->name_list + && list_count(qos_cond->name_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(qos_cond->name_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "name='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + + if(!extra) { + error("Nothing to remove"); + return NULL; + } + + query = xstrdup_printf("select id from %s %s;", qos_table, extra); + xfree(extra); + if(!(result = mysql_db_query_ret( + mysql_conn->db_conn, query, 0))) { + xfree(query); + return NULL; + } + + rc = 0; + ret_list = list_create(slurm_destroy_char); + while((row = mysql_fetch_row(result))) { + char *object = xstrdup(row[0]); + acct_qos_rec_t *qos_rec = NULL; + + list_append(ret_list, object); + if(!rc) { + xstrfmtcat(name_char, "id='%s'", object); + xstrfmtcat(assoc_char, "qos=replace(qos, ',%s', '')", + object); + rc = 1; + } else { + xstrfmtcat(name_char, " || id='%s'", object); + xstrfmtcat(assoc_char, ", qos=replace(qos, ',%s', '')", + object); + } + qos_rec = xmalloc(sizeof(acct_qos_rec_t)); + qos_rec->name = xstrdup(object); + _addto_update_list(mysql_conn->update_list, ACCT_REMOVE_QOS, + qos_rec); + } + mysql_free_result(result); + + if(!list_count(ret_list)) { + errno = SLURM_NO_CHANGE_IN_DATA; + debug3("didn't effect anything\n%s", query); + xfree(query); + return ret_list; + } + xfree(query); + + if(_remove_common(mysql_conn, DBD_REMOVE_ACCOUNTS, now, + user_name, qos_table, name_char, assoc_char) + == SLURM_ERROR) { + list_destroy(ret_list); + xfree(name_char); + return NULL; + } + xfree(name_char); + + return ret_list; +#else + return NULL; +#endif +} + extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { #ifdef HAVE_MYSQL char *query = NULL; @@ -4185,7 +4861,7 @@ extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn, enum { USER_REQ_NAME, USER_REQ_DA, - USER_REQ_EX, + USER_REQ_QOS, USER_REQ_AL, USER_REQ_COUNT }; @@ -4195,21 +4871,23 @@ extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn, - if(!user_q) { + if(!user_cond) { xstrcat(extra, "where deleted=0"); goto empty; } - if(user_q->with_deleted) + if(user_cond->with_deleted) xstrcat(extra, "where (deleted=0 || deleted=1)"); else xstrcat(extra, "where deleted=0"); - if(user_q->user_list && list_count(user_q->user_list)) { + if(user_cond->assoc_cond && + user_cond->assoc_cond->user_list + && list_count(user_cond->assoc_cond->user_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(user_q->user_list); + itr = list_iterator_create(user_cond->assoc_cond->user_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4220,10 +4898,10 @@ extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(user_q->def_acct_list && list_count(user_q->def_acct_list)) { + if(user_cond->def_acct_list && list_count(user_cond->def_acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(user_q->def_acct_list); + itr = list_iterator_create(user_cond->def_acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4234,22 +4912,25 @@ extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(user_q->qos != ACCT_QOS_NOTSET) { - if(extra) - xstrfmtcat(extra, " && qos=%u", user_q->qos); - else - xstrfmtcat(extra, " where qos=%u", - user_q->qos); - + if(user_cond->qos_list && list_count(user_cond->qos_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(user_cond->qos_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, + "(qos like '%%,%s' || qos like '%%,%s,%%')", + object, object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); } - if(user_q->admin_level != ACCT_ADMIN_NOTSET) { - if(extra) - xstrfmtcat(extra, " && admin_level=%u", - user_q->admin_level); - else - xstrfmtcat(extra, " where admin_level=%u", - user_q->admin_level); + if(user_cond->admin_level != ACCT_ADMIN_NOTSET) { + xstrfmtcat(extra, " && admin_level=%u", + user_cond->admin_level); } empty: @@ -4263,9 +4944,9 @@ empty: xfree(tmp); xfree(extra); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -4281,7 +4962,11 @@ empty: user->name = xstrdup(row[USER_REQ_NAME]); user->default_acct = xstrdup(row[USER_REQ_DA]); user->admin_level = atoi(row[USER_REQ_AL]); - user->qos = atoi(row[USER_REQ_EX]); + if(row[USER_REQ_QOS] && row[USER_REQ_QOS][0]) { + user->qos_list = list_create(slurm_destroy_char); + slurm_addto_char_list(user->qos_list, + row[USER_REQ_QOS]); + } /* user id will be set on the client since this could be on a * different machine where this user may not exist or @@ -4292,26 +4977,26 @@ empty: /* user->uid = passwd_ptr->pw_uid; */ /* else */ /* user->uid = (uint32_t)NO_VAL; */ - if(user_q && user_q->with_coords) { + if(user_cond && user_cond->with_coords) { _get_user_coords(mysql_conn, user); } - if(user_q && user_q->with_assocs) { - acct_association_cond_t *assoc_q = NULL; - if(!user_q->assoc_cond) { - user_q->assoc_cond = xmalloc( + if(user_cond && user_cond->with_assocs) { + acct_association_cond_t *assoc_cond = NULL; + if(!user_cond->assoc_cond) { + user_cond->assoc_cond = xmalloc( sizeof(acct_association_cond_t)); } - assoc_q = user_q->assoc_cond; - if(assoc_q->user_list) - list_destroy(assoc_q->user_list); + assoc_cond = user_cond->assoc_cond; + if(assoc_cond->user_list) + list_destroy(assoc_cond->user_list); - assoc_q->user_list = list_create(NULL); - list_append(assoc_q->user_list, user->name); + assoc_cond->user_list = list_create(NULL); + list_append(assoc_cond->user_list, user->name); user->assoc_list = acct_storage_p_get_associations( - mysql_conn, assoc_q); - list_destroy(assoc_q->user_list); - assoc_q->user_list = NULL; + mysql_conn, assoc_cond); + list_destroy(assoc_cond->user_list); + assoc_cond->user_list = NULL; } } mysql_free_result(result); @@ -4323,7 +5008,7 @@ empty: } extern List acct_storage_p_get_accts(mysql_conn_t *mysql_conn, - acct_account_cond_t *acct_q) + acct_account_cond_t *acct_cond) { #ifdef HAVE_MYSQL char *query = NULL; @@ -4356,20 +5041,22 @@ extern List acct_storage_p_get_accts(mysql_conn_t *mysql_conn, return NULL; - if(!acct_q) { + if(!acct_cond) { xstrcat(extra, "where deleted=0"); goto empty; } - if(acct_q->with_deleted) + if(acct_cond->with_deleted) xstrcat(extra, "where (deleted=0 || deleted=1)"); else xstrcat(extra, "where deleted=0"); - if(acct_q->acct_list && list_count(acct_q->acct_list)) { + if(acct_cond->assoc_cond + && acct_cond->assoc_cond->acct_list + && list_count(acct_cond->assoc_cond->acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->acct_list); + itr = list_iterator_create(acct_cond->assoc_cond->acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4380,10 +5067,11 @@ extern List acct_storage_p_get_accts(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(acct_q->description_list && list_count(acct_q->description_list)) { + if(acct_cond->description_list + && list_count(acct_cond->description_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->description_list); + itr = list_iterator_create(acct_cond->description_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4394,10 +5082,11 @@ extern List acct_storage_p_get_accts(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(acct_q->organization_list && list_count(acct_q->organization_list)) { + if(acct_cond->organization_list + && list_count(acct_cond->organization_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(acct_q->organization_list); + itr = list_iterator_create(acct_cond->organization_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4408,12 +5097,20 @@ extern List acct_storage_p_get_accts(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(acct_q->qos != ACCT_QOS_NOTSET) { - if(extra) - xstrfmtcat(extra, " && qos=%u", acct_q->qos); - else - xstrfmtcat(extra, " where qos=%u", - acct_q->qos); + if(acct_cond->qos_list && list_count(acct_cond->qos_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(acct_cond->qos_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, + "(qos like '%%,%s' || qos like '%%,%s,%%')", + object, object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); } empty: @@ -4428,9 +5125,9 @@ empty: xfree(tmp); xfree(extra); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -4445,28 +5142,32 @@ empty: acct->name = xstrdup(row[ACCT_REQ_NAME]); acct->description = xstrdup(row[ACCT_REQ_DESC]); acct->organization = xstrdup(row[ACCT_REQ_ORG]); - acct->qos = atoi(row[ACCT_REQ_QOS]); + if(row[ACCT_REQ_QOS] && row[ACCT_REQ_QOS][0]) { + acct->qos_list = list_create(slurm_destroy_char); + slurm_addto_char_list(acct->qos_list, + row[ACCT_REQ_QOS]); + } - if(acct_q && acct_q->with_coords) { + if(acct_cond && acct_cond->with_coords) { _get_account_coords(mysql_conn, acct); } - if(acct_q && acct_q->with_assocs) { - acct_association_cond_t *assoc_q = NULL; - if(!acct_q->assoc_cond) { - acct_q->assoc_cond = xmalloc( + if(acct_cond && acct_cond->with_assocs) { + acct_association_cond_t *assoc_cond = NULL; + if(!acct_cond->assoc_cond) { + acct_cond->assoc_cond = xmalloc( sizeof(acct_association_cond_t)); } - assoc_q = acct_q->assoc_cond; - if(assoc_q->acct_list) - list_destroy(assoc_q->acct_list); + assoc_cond = acct_cond->assoc_cond; + if(assoc_cond->acct_list) + list_destroy(assoc_cond->acct_list); - assoc_q->acct_list = list_create(NULL); - list_append(assoc_q->acct_list, acct->name); + assoc_cond->acct_list = list_create(NULL); + list_append(assoc_cond->acct_list, acct->name); acct->assoc_list = acct_storage_p_get_associations( - mysql_conn, assoc_q); - list_destroy(assoc_q->acct_list); - assoc_q->acct_list = NULL; + mysql_conn, assoc_cond); + list_destroy(assoc_cond->acct_list); + assoc_cond->acct_list = NULL; } } @@ -4479,7 +5180,7 @@ empty: } extern List acct_storage_p_get_clusters(mysql_conn_t *mysql_conn, - acct_cluster_cond_t *cluster_q) + acct_cluster_cond_t *cluster_cond) { #ifdef HAVE_MYSQL char *query = NULL; @@ -4525,20 +5226,20 @@ extern List acct_storage_p_get_clusters(mysql_conn_t *mysql_conn, return NULL; - if(!cluster_q) { + if(!cluster_cond) { xstrcat(extra, "where deleted=0"); goto empty; } - if(cluster_q->with_deleted) + if(cluster_cond->with_deleted) xstrcat(extra, "where (deleted=0 || deleted=1)"); else xstrcat(extra, "where deleted=0"); - if(cluster_q->cluster_list && list_count(cluster_q->cluster_list)) { + if(cluster_cond->cluster_list && list_count(cluster_cond->cluster_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(cluster_q->cluster_list); + itr = list_iterator_create(cluster_cond->cluster_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4563,9 +5264,9 @@ empty: xfree(tmp); xfree(extra); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -4589,10 +5290,11 @@ empty: cluster->name = xstrdup(row[CLUSTER_REQ_NAME]); /* get the usage if requested */ - if(cluster_q->with_usage) { - clusteracct_storage_p_get_usage(mysql_conn, cluster, - cluster_q->usage_start, - cluster_q->usage_end); + if(cluster_cond->with_usage) { + clusteracct_storage_p_get_usage( + mysql_conn, cluster, + cluster_cond->usage_start, + cluster_cond->usage_end); } cluster->control_host = xstrdup(row[CLUSTER_REQ_CH]); @@ -4600,7 +5302,7 @@ empty: query = xstrdup_printf("select %s from %s where cluster='%s' " "&& acct='root'", tmp, assoc_table, cluster->name); - if(!(result2 = mysql_db_query_ret(mysql_conn->acct_mysql_db, + if(!(result2 = mysql_db_query_ret(mysql_conn->db_conn, query, 1))) { xfree(query); break; @@ -4608,29 +5310,29 @@ empty: xfree(query); row2 = mysql_fetch_row(result2); - if(row2[ASSOC_REQ_FS]) + if(row2 && row2[ASSOC_REQ_FS]) cluster->default_fairshare = atoi(row2[ASSOC_REQ_FS]); else cluster->default_fairshare = 1; - if(row2[ASSOC_REQ_MJ]) + if(row2 && row2[ASSOC_REQ_MJ]) cluster->default_max_jobs = atoi(row2[ASSOC_REQ_MJ]); else cluster->default_max_jobs = INFINITE; - if(row2[ASSOC_REQ_MNPJ]) + if(row2 && row2[ASSOC_REQ_MNPJ]) cluster->default_max_nodes_per_job = atoi(row2[ASSOC_REQ_MNPJ]); else cluster->default_max_nodes_per_job = INFINITE; - if(row2[ASSOC_REQ_MWPJ]) + if(row2 && row2[ASSOC_REQ_MWPJ]) cluster->default_max_wall_duration_per_job = atoi(row2[ASSOC_REQ_MWPJ]); else cluster->default_max_wall_duration_per_job = INFINITE; - if(row2[ASSOC_REQ_MCPJ]) + if(row2 && row2[ASSOC_REQ_MCPJ]) cluster->default_max_cpu_secs_per_job = atoi(row2[ASSOC_REQ_MCPJ]); else @@ -4647,7 +5349,7 @@ empty: } extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, - acct_association_cond_t *assoc_q) + acct_association_cond_t *assoc_cond) { #ifdef HAVE_MYSQL char *query = NULL; @@ -4671,6 +5373,11 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, uint32_t user_parent_id = 0; uint32_t acct_parent_id = 0; + /* needed if we don't have an assoc_cond */ + uint16_t without_parent_info = 0; + uint16_t without_parent_limits = 0; + uint16_t with_usage = 0; + /* if this changes you will need to edit the corresponding enum */ char *assoc_req_inx[] = { "id", @@ -4715,20 +5422,20 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, return NULL; - if(!assoc_q) { + if(!assoc_cond) { xstrcat(extra, "where deleted=0"); goto empty; } - if(assoc_q->with_deleted) + if(assoc_cond->with_deleted) xstrcat(extra, "where (deleted=0 || deleted=1)"); else xstrcat(extra, "where deleted=0"); - if(assoc_q->acct_list && list_count(assoc_q->acct_list)) { + if(assoc_cond->acct_list && list_count(assoc_cond->acct_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->acct_list); + itr = list_iterator_create(assoc_cond->acct_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4739,10 +5446,10 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->cluster_list && list_count(assoc_q->cluster_list)) { + if(assoc_cond->cluster_list && list_count(assoc_cond->cluster_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->cluster_list); + itr = list_iterator_create(assoc_cond->cluster_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4753,10 +5460,10 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->user_list && list_count(assoc_q->user_list)) { + if(assoc_cond->user_list && list_count(assoc_cond->user_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->user_list); + itr = list_iterator_create(assoc_cond->user_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); @@ -4767,10 +5474,10 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->id_list && list_count(assoc_q->id_list)) { + if(assoc_cond->id_list && list_count(assoc_cond->id_list)) { set = 0; xstrcat(extra, " && ("); - itr = list_iterator_create(assoc_q->id_list); + itr = list_iterator_create(assoc_cond->id_list); while((object = list_next(itr))) { char *ptr = NULL; long num = strtol(object, &ptr, 10); @@ -4791,9 +5498,14 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } - if(assoc_q->parent_acct) { - xstrfmtcat(extra, " && parent_acct='%s'", assoc_q->parent_acct); + if(assoc_cond->parent_acct) { + xstrfmtcat(extra, " && parent_acct='%s'", + assoc_cond->parent_acct); } + + with_usage = assoc_cond->with_usage; + without_parent_limits = assoc_cond->without_parent_limits; + without_parent_info = assoc_cond->without_parent_info; empty: xfree(tmp); xstrfmtcat(tmp, "%s", assoc_req_inx[i]); @@ -4805,9 +5517,9 @@ empty: tmp, assoc_table, extra); xfree(tmp); xfree(extra); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return NULL; } @@ -4828,10 +5540,10 @@ empty: assoc->rgt = atoi(row[ASSOC_REQ_RGT]); /* get the usage if requested */ - if(assoc_q->with_usage) { + if(with_usage) { acct_storage_p_get_usage(mysql_conn, assoc, - assoc_q->usage_start, - assoc_q->usage_end); + assoc_cond->usage_start, + assoc_cond->usage_end); } if(row[ASSOC_REQ_USER][0]) @@ -4839,7 +5551,8 @@ empty: assoc->acct = xstrdup(row[ASSOC_REQ_ACCT]); assoc->cluster = xstrdup(row[ASSOC_REQ_CLUSTER]); - if(!assoc_q->without_parent_info && row[ASSOC_REQ_PARENT][0]) { + if(!without_parent_info + && row[ASSOC_REQ_PARENT][0]) { /* info("got %s?=%s and %s?=%s", */ /* row[ASSOC_REQ_PARENT], last_acct_parent, */ /* row[ASSOC_REQ_CLUSTER], last_cluster); */ @@ -4852,10 +5565,11 @@ empty: "and cluster='%s';", assoc_table, row[ASSOC_REQ_PARENT], row[ASSOC_REQ_CLUSTER]); - debug4("%d query\n%s", mysql_conn->conn, query); + debug4("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); if(!(result2 = mysql_db_query_ret( - mysql_conn->acct_mysql_db, + mysql_conn->db_conn, query, 1))) { xfree(query); break; @@ -4886,10 +5600,10 @@ empty: "select @par_id, @mj, @mnpj, @mwpj, @mcpj;", assoc_table, row[ASSOC_REQ_ACCT], row[ASSOC_REQ_CLUSTER], - assoc_q->without_parent_limits); + without_parent_limits); if(!(result2 = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 1))) { + mysql_conn->db_conn, query, 1))) { xfree(query); break; } @@ -4897,24 +5611,27 @@ empty: row2 = mysql_fetch_row(result2); user_parent_id = atoi(row2[ASSOC2_REQ_PARENT_ID]); - if(!assoc_q->without_parent_limits) { + if(!without_parent_limits) { if(row2[ASSOC2_REQ_MJ]) parent_mj = atoi(row2[ASSOC2_REQ_MJ]); else parent_mj = INFINITE; if(row2[ASSOC2_REQ_MNPJ]) - parent_mnpj = atoi(row2[ASSOC2_REQ_MNPJ]); + parent_mnpj = + atoi(row2[ASSOC2_REQ_MNPJ]); else parent_mwpj = INFINITE; if(row2[ASSOC2_REQ_MWPJ]) - parent_mwpj = atoi(row2[ASSOC2_REQ_MWPJ]); + parent_mwpj = + atoi(row2[ASSOC2_REQ_MWPJ]); else parent_mwpj = INFINITE; if(row2[ASSOC2_REQ_MCPJ]) - parent_mcpj = atoi(row2[ASSOC2_REQ_MCPJ]); + parent_mcpj = + atoi(row2[ASSOC2_REQ_MCPJ]); else parent_mcpj = INFINITE; } @@ -4957,6 +5674,302 @@ empty: #endif } +extern List acct_storage_p_get_qos(mysql_conn_t *mysql_conn, + acct_qos_cond_t *qos_cond) +{ +#ifdef HAVE_MYSQL + char *query = NULL; + char *extra = NULL; + char *tmp = NULL; + List qos_list = NULL; + ListIterator itr = NULL; + char *object = NULL; + int set = 0; + int i=0; + MYSQL_RES *result = NULL; + MYSQL_ROW row; + + /* if this changes you will need to edit the corresponding enum */ + char *qos_req_inx[] = { + "name", + "description", + "id" + }; + enum { + QOS_REQ_NAME, + QOS_REQ_DESC, + QOS_REQ_ID, + QOS_REQ_COUNT + }; + + if(_check_connection(mysql_conn) != SLURM_SUCCESS) + return NULL; + + + + if(!qos_cond) { + xstrcat(extra, "where deleted=0"); + goto empty; + } + + if(qos_cond->with_deleted) + xstrcat(extra, "where (deleted=0 || deleted=1)"); + else + xstrcat(extra, "where deleted=0"); + + + if(qos_cond->description_list + && list_count(qos_cond->description_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(qos_cond->description_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "description='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + + if(qos_cond->id_list + && list_count(qos_cond->id_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(qos_cond->id_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "id='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + + if(qos_cond->name_list + && list_count(qos_cond->name_list)) { + set = 0; + xstrcat(extra, " && ("); + itr = list_iterator_create(qos_cond->name_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "name='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + +empty: + + xfree(tmp); + xstrfmtcat(tmp, "%s", qos_req_inx[i]); + for(i=1; i<QOS_REQ_COUNT; i++) { + xstrfmtcat(tmp, ", %s", qos_req_inx[i]); + } + + query = xstrdup_printf("select %s from %s %s", tmp, qos_table, extra); + xfree(tmp); + xfree(extra); + + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + if(!(result = mysql_db_query_ret( + mysql_conn->db_conn, query, 0))) { + xfree(query); + return NULL; + } + xfree(query); + + qos_list = list_create(destroy_acct_qos_rec); + + while((row = mysql_fetch_row(result))) { + acct_qos_rec_t *qos = xmalloc(sizeof(acct_qos_rec_t)); + list_append(qos_list, qos); + + qos->description = xstrdup(row[QOS_REQ_DESC]); + qos->id = atoi(row[QOS_REQ_ID]); + qos->name = xstrdup(row[QOS_REQ_NAME]); + } + mysql_free_result(result); + + return qos_list; +#else + return NULL; +#endif +} + +extern List acct_storage_p_get_txn(mysql_conn_t *mysql_conn, + acct_txn_cond_t *txn_cond) +{ +#ifdef HAVE_MYSQL + char *query = NULL; + char *extra = NULL; + char *tmp = NULL; + List txn_list = NULL; + ListIterator itr = NULL; + char *object = NULL; + int set = 0; + int i=0; + MYSQL_RES *result = NULL; + MYSQL_ROW row; + + /* if this changes you will need to edit the corresponding enum */ + char *txn_req_inx[] = { + "id", + "timestamp", + "action", + "name", + "actor", + "info" + }; + enum { + TXN_REQ_ID, + TXN_REQ_TS, + TXN_REQ_ACTION, + TXN_REQ_NAME, + TXN_REQ_ACTOR, + TXN_REQ_INFO, + TXN_REQ_COUNT + }; + + if(_check_connection(mysql_conn) != SLURM_SUCCESS) + return NULL; + + if(!txn_cond) + goto empty; + + if(txn_cond->action_list && list_count(txn_cond->action_list)) { + set = 0; + if(extra) + xstrcat(extra, " && ("); + else + xstrcat(extra, " where ("); + itr = list_iterator_create(txn_cond->action_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "action='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + + if(txn_cond->actor_list && list_count(txn_cond->actor_list)) { + set = 0; + if(extra) + xstrcat(extra, " && ("); + else + xstrcat(extra, " where ("); + itr = list_iterator_create(txn_cond->actor_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "actor='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + + if(txn_cond->id_list && list_count(txn_cond->id_list)) { + set = 0; + if(extra) + xstrcat(extra, " && ("); + else + xstrcat(extra, " where ("); + itr = list_iterator_create(txn_cond->id_list); + while((object = list_next(itr))) { + char *ptr = NULL; + long num = strtol(object, &ptr, 10); + if ((num == 0) && ptr && ptr[0]) { + error("Invalid value for txn id (%s)", + object); + xfree(extra); + list_iterator_destroy(itr); + return NULL; + } + + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "id=%s", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + + if(txn_cond->time_start && txn_cond->time_end) { + if(extra) + xstrcat(extra, " && ("); + else + xstrcat(extra, " where ("); + xstrfmtcat(extra, "timestamp < %d && timestamp >= %d)", + txn_cond->time_end, txn_cond->time_start); + } else if(txn_cond->time_start) { + if(extra) + xstrcat(extra, " && ("); + else + xstrcat(extra, " where ("); + xstrfmtcat(extra, "timestamp >= %d)", txn_cond->time_start); + + } else if(txn_cond->time_end) { + if(extra) + xstrcat(extra, " && ("); + else + xstrcat(extra, " where ("); + xstrfmtcat(extra, "timestamp < %d)", txn_cond->time_end); + } +empty: + xfree(tmp); + xstrfmtcat(tmp, "%s", txn_req_inx[i]); + for(i=1; i<TXN_REQ_COUNT; i++) { + xstrfmtcat(tmp, ", %s", txn_req_inx[i]); + } + + query = xstrdup_printf("select %s from %s", tmp, txn_table); + + if(extra) { + xstrfmtcat(query, "%s", extra); + xfree(extra); + } + xstrcat(query, " order by timestamp;"); + + xfree(tmp); + + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + if(!(result = mysql_db_query_ret( + mysql_conn->db_conn, query, 0))) { + xfree(query); + return NULL; + } + xfree(query); + + txn_list = list_create(destroy_acct_txn_rec); + + while((row = mysql_fetch_row(result))) { + acct_txn_rec_t *txn = xmalloc(sizeof(acct_txn_rec_t)); + + list_append(txn_list, txn); + + txn->action = atoi(row[TXN_REQ_ACTION]); + txn->actor_name = xstrdup(row[TXN_REQ_ACTOR]); + txn->id = atoi(row[TXN_REQ_ID]); + txn->set_info = xstrdup(row[TXN_REQ_INFO]); + txn->timestamp = atoi(row[TXN_REQ_TS]); + txn->where_query = xstrdup(row[TXN_REQ_NAME]); + } + mysql_free_result(result); + + return txn_list; +#else + return NULL; +#endif +} + extern int acct_storage_p_get_usage(mysql_conn_t *mysql_conn, acct_association_rec_t *acct_assoc, time_t start, time_t end) @@ -5066,9 +6079,9 @@ extern int acct_storage_p_get_usage(mysql_conn_t *mysql_conn, tmp, my_usage_table, assoc_table, assoc_table, end, start, acct_assoc->id); xfree(tmp); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -5141,7 +6154,7 @@ extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn, xfree(tmp); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -5163,7 +6176,7 @@ extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn, mysql_free_result(result); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -5316,7 +6329,7 @@ extern int acct_storage_p_roll_usage(mysql_conn_t *mysql_conn, if(query) { debug3("%s", query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); } return rc; @@ -5339,6 +6352,11 @@ extern int clusteracct_storage_p_node_down(mysql_conn_t *mysql_conn, if(_check_connection(mysql_conn) != SLURM_SUCCESS) return SLURM_ERROR; + if(!node_ptr) { + error("No node_ptr given!"); + return SLURM_ERROR; + } + if (slurmctld_conf.fast_schedule && !slurmdbd_conf) cpus = node_ptr->config_ptr->cpus; else @@ -5361,7 +6379,7 @@ extern int clusteracct_storage_p_node_down(mysql_conn_t *mysql_conn, "values ('%s', '%s', %u, %d, '%s');", event_table, node_ptr->name, cluster, cpus, event_time, my_reason); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); return rc; @@ -5385,7 +6403,7 @@ extern int clusteracct_storage_p_node_up(mysql_conn_t *mysql_conn, "update %s set period_end=%d where cluster='%s' " "and period_end=0 and node_name='%s';", event_table, event_time, cluster, node_ptr->name); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); return rc; #else @@ -5419,7 +6437,7 @@ extern int clusteracct_storage_p_cluster_procs(mysql_conn_t *mysql_conn, "and period_end=0 and node_name=''", event_table, cluster); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -5443,16 +6461,16 @@ extern int clusteracct_storage_p_cluster_procs(mysql_conn_t *mysql_conn, "update %s set period_end=%d where cluster='%s' " "and period_end=0 and node_name=''", event_table, event_time, cluster); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) goto end_it; add_it: query = xstrdup_printf( - "insert into %s (cluster, cpu_count, period_start) " - "values ('%s', %u, %d)", + "insert into %s (cluster, cpu_count, period_start, reason) " + "values ('%s', %u, %d, 'Cluster processor count')", event_table, cluster, procs, event_time); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); end_it: @@ -5576,9 +6594,9 @@ extern int clusteracct_storage_p_get_usage( tmp, my_usage_table, end, start, cluster_rec->name); xfree(tmp); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -5701,14 +6719,14 @@ extern int jobacct_storage_p_job_start(mysql_conn_t *mysql_conn, try_again: if(!(job_ptr->db_index = mysql_insert_ret_id( - mysql_conn->acct_mysql_db, query))) { + mysql_conn->db_conn, query))) { if(!reinit) { error("It looks like the storage has gone " "away trying to reconnect"); mysql_close_db_connection( - &mysql_conn->acct_mysql_db); + &mysql_conn->db_conn); mysql_get_db_connection( - &mysql_conn->acct_mysql_db, + &mysql_conn->db_conn, mysql_db_name, mysql_db_info); reinit = 1; goto try_again; @@ -5726,7 +6744,7 @@ extern int jobacct_storage_p_job_start(mysql_conn_t *mysql_conn, job_ptr->job_state & (~JOB_COMPLETING), job_ptr->total_procs, nodes, job_ptr->account, job_ptr->db_index); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); } xfree(block_id); @@ -5771,7 +6789,7 @@ extern int jobacct_storage_p_job_complete(mysql_conn_t *mysql_conn, nodes = "(null)"; if(!job_ptr->db_index) { - job_ptr->db_index = _get_db_index(mysql_conn->acct_mysql_db, + job_ptr->db_index = _get_db_index(mysql_conn->db_conn, job_ptr->details->submit_time, job_ptr->job_id, job_ptr->assoc_id); @@ -5788,7 +6806,7 @@ extern int jobacct_storage_p_job_complete(mysql_conn_t *mysql_conn, job_ptr->job_state & (~JOB_COMPLETING), nodes, job_ptr->exit_code, job_ptr->requid, job_ptr->db_index); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); return rc; @@ -5858,7 +6876,7 @@ extern int jobacct_storage_p_step_start(mysql_conn_t *mysql_conn, if(!step_ptr->job_ptr->db_index) { step_ptr->job_ptr->db_index = - _get_db_index(mysql_conn->acct_mysql_db, + _get_db_index(mysql_conn->db_conn, step_ptr->job_ptr->details->submit_time, step_ptr->job_ptr->job_id, step_ptr->job_ptr->assoc_id); @@ -5876,8 +6894,8 @@ extern int jobacct_storage_p_step_start(mysql_conn_t *mysql_conn, step_ptr->step_id, (int)step_ptr->start_time, step_ptr->name, JOB_RUNNING, cpus, node_list, cpus, JOB_RUNNING); - debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); return rc; @@ -5965,7 +6983,7 @@ extern int jobacct_storage_p_step_complete(mysql_conn_t *mysql_conn, if(!step_ptr->job_ptr->db_index) { step_ptr->job_ptr->db_index = - _get_db_index(mysql_conn->acct_mysql_db, + _get_db_index(mysql_conn->db_conn, step_ptr->job_ptr->details->submit_time, step_ptr->job_ptr->job_id, step_ptr->job_ptr->assoc_id); @@ -6016,7 +7034,7 @@ extern int jobacct_storage_p_step_complete(mysql_conn_t *mysql_conn, jobacct->min_cpu_id.nodeid, /* min cpu node */ ave_cpu, /* ave cpu */ step_ptr->job_ptr->db_index, step_ptr->step_id); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); return rc; @@ -6039,7 +7057,7 @@ extern int jobacct_storage_p_suspend(mysql_conn_t *mysql_conn, if(_check_connection(mysql_conn) != SLURM_SUCCESS) return SLURM_ERROR; if(!job_ptr->db_index) { - job_ptr->db_index = _get_db_index(mysql_conn->acct_mysql_db, + job_ptr->db_index = _get_db_index(mysql_conn->db_conn, job_ptr->details->submit_time, job_ptr->job_id, job_ptr->assoc_id); @@ -6067,9 +7085,9 @@ extern int jobacct_storage_p_suspend(mysql_conn_t *mysql_conn, "update %s set end=%d where id=%u && end=0;", suspend_table, (int)job_ptr->suspend_time, job_ptr->db_index); - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_ERROR) { @@ -6078,7 +7096,7 @@ extern int jobacct_storage_p_suspend(mysql_conn_t *mysql_conn, "state=%d where id=%u and end=0", step_table, (int)job_ptr->suspend_time, job_ptr->job_state, job_ptr->db_index); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); } @@ -6101,30 +7119,34 @@ extern List jobacct_storage_p_get_jobs(mysql_conn_t *mysql_conn, List job_list = NULL; #ifdef HAVE_MYSQL acct_job_cond_t job_cond; - struct passwd *pw = NULL; if(_check_connection(mysql_conn) != SLURM_SUCCESS) return NULL; memset(&job_cond, 0, sizeof(acct_job_cond_t)); + job_cond.acct_list = selected_steps; job_cond.step_list = selected_steps; job_cond.partition_list = selected_parts; - if(params->opt_cluster) { - job_cond.cluster_list = list_create(NULL); - list_append(job_cond.cluster_list, params->opt_cluster); - } + job_cond.cluster_list = params->opt_cluster_list; + + if (params->opt_uid >=0) { + char *temp = xstrdup_printf("%u", params->opt_uid); + job_cond.userid_list = list_create(NULL); + list_append(job_cond.userid_list, temp); + } - if (params->opt_uid >=0 && (pw=getpwuid(params->opt_uid))) { - job_cond.user_list = list_create(NULL); - list_append(job_cond.user_list, pw->pw_name); + if (params->opt_gid >=0) { + char *temp = xstrdup_printf("%u", params->opt_gid); + job_cond.groupid_list = list_create(NULL); + list_append(job_cond.groupid_list, temp); } job_list = mysql_jobacct_process_get_jobs(mysql_conn, &job_cond); - if(job_cond.user_list) - list_destroy(job_cond.user_list); - if(job_cond.cluster_list) - list_destroy(job_cond.cluster_list); + if(job_cond.userid_list) + list_destroy(job_cond.userid_list); + if(job_cond.groupid_list) + list_destroy(job_cond.groupid_list); #endif return job_list; @@ -6190,11 +7212,11 @@ extern int acct_storage_p_flush_jobs_on_cluster( * the suspend table and the step table */ query = xstrdup_printf("select t1.id, t1.state from %s as t1, %s as t2 " - "where t2.id=t1.associd and t2.cluster='%s' " - "&& t1.end=0;", + "where ((t2.id=t1.associd and t2.cluster='%s') " + "|| !t1.associd) && t1.end=0;", job_table, assoc_table, cluster); if(!(result = - mysql_db_query_ret(mysql_conn->acct_mysql_db, query, 0))) { + mysql_db_query_ret(mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -6244,9 +7266,10 @@ extern int acct_storage_p_flush_jobs_on_cluster( /* job_table, assoc_table, JOB_CANCELLED, */ /* event_time, cluster); */ if(query) { - debug3("%d query\n%s", mysql_conn->conn, query); + debug3("%d(%d) query\n%s", + mysql_conn->conn, __LINE__, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); } #endif diff --git a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c index 7d669562f8a8a64bb677f25aadda8fdfc3952d54..094af22df95d8aaf0ebd0eaa5cde3d19ac243b13 100644 --- a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c +++ b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c @@ -60,7 +60,7 @@ extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, char *table_level="t2"; MYSQL_RES *result = NULL, *step_result = NULL; MYSQL_ROW row, step_row; - int i; + int i, last_id = -1, curr_id = -1; jobacct_job_rec_t *job = NULL; jobacct_step_rec_t *step = NULL; time_t now = time(NULL); @@ -237,6 +237,24 @@ extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, xstrcat(extra, ")"); } + if(job_cond->userid_list && list_count(job_cond->userid_list)) { + set = 0; + if(extra) + xstrcat(extra, " && ("); + else + xstrcat(extra, " where ("); + + itr = list_iterator_create(job_cond->userid_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "t1.uid='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + if(job_cond->groupid_list && list_count(job_cond->groupid_list)) { set = 0; if(extra) @@ -302,48 +320,45 @@ extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, job_cond->usage_end, job_cond->usage_start); } - /* we need to put all the associations (t2) stuff together here */ - if(job_cond->cluster_list && list_count(job_cond->cluster_list)) { + if(job_cond->state_list && list_count(job_cond->state_list)) { set = 0; if(extra) xstrcat(extra, " && ("); else xstrcat(extra, " where ("); - itr = list_iterator_create(job_cond->cluster_list); + itr = list_iterator_create(job_cond->state_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); - xstrfmtcat(extra, "%s.cluster='%s'", - table_level, object); + xstrfmtcat(extra, "t1.state='%s'", object); set = 1; } list_iterator_destroy(itr); - /* just incase the association is gone */ - if(set) - xstrcat(extra, " || "); - xstrfmtcat(extra, "%s.cluster is null)", table_level); + xstrcat(extra, ")"); } - if(job_cond->user_list && list_count(job_cond->user_list)) { + /* we need to put all the associations (t2) stuff together here */ + if(job_cond->cluster_list && list_count(job_cond->cluster_list)) { set = 0; if(extra) xstrcat(extra, " && ("); else xstrcat(extra, " where ("); - itr = list_iterator_create(job_cond->user_list); + itr = list_iterator_create(job_cond->cluster_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " || "); - xstrfmtcat(extra, "%s.user='%s'", table_level, object); + xstrfmtcat(extra, "%s.cluster='%s'", + table_level, object); set = 1; } list_iterator_destroy(itr); /* just incase the association is gone */ if(set) xstrcat(extra, " || "); - xstrfmtcat(extra, "%s.user is null)", table_level); + xstrfmtcat(extra, "%s.cluster is null)", table_level); } no_cond: @@ -362,10 +377,15 @@ no_cond: xstrcat(query, extra); xfree(extra); } - + /* Here we want to order them this way in such a way so it is + easy to look for duplicates + */ + if(job_cond && !job_cond->duplicates) + xstrcat(query, " order by jobid, submit desc"); + debug3("%d query\n%s", mysql_conn->conn, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); list_destroy(job_list); return NULL; @@ -374,7 +394,14 @@ no_cond: while((row = mysql_fetch_row(result))) { char *id = row[JOB_REQ_ID]; + + curr_id = atoi(row[JOB_REQ_JOBID]); + + if(job_cond && !job_cond->duplicates && curr_id == last_id) + continue; + last_id = curr_id; + job = create_jobacct_job_rec(); job->alloc_cpus = atoi(row[JOB_REQ_ALLOC_CPUS]); @@ -400,7 +427,7 @@ no_cond: job->submit = atoi(row[JOB_REQ_SUBMIT]); job->start = atoi(row[JOB_REQ_START]); job->end = atoi(row[JOB_REQ_END]); - if(job_cond->usage_start) { + if(job_cond && job_cond->usage_start) { if(job->start && (job->start < job_cond->usage_start)) job->start = job_cond->usage_start; @@ -424,11 +451,11 @@ no_cond: suspend_table, job_cond->usage_end, job_cond->usage_start, - row[JOB_REQ_ID]); + id); debug4("%d query\n%s", mysql_conn->conn, query); if(!(result2 = mysql_db_query_ret( - mysql_conn->acct_mysql_db, + mysql_conn->db_conn, query, 0))) { list_destroy(job_list); job_list = NULL; @@ -470,7 +497,7 @@ no_cond: job->elapsed -= job->suspended; } - job->jobid = atoi(row[JOB_REQ_JOBID]); + job->jobid = curr_id; job->jobname = xstrdup(row[JOB_REQ_NAME]); job->gid = atoi(row[JOB_REQ_GID]); job->exitcode = atoi(row[JOB_REQ_COMP_CODE]); @@ -534,7 +561,7 @@ no_cond: //info("query = %s", query); if(!(step_result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); list_destroy(job_list); return NULL; diff --git a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.h b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.h index c04fee6c40c4efcde525895fc315669ae1bc3685..983d6db2fb5a59fa684a9f0fdccb78cfbfdc8814 100644 --- a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.h +++ b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.h @@ -53,16 +53,7 @@ #include "src/database/mysql_common.h" #include "src/common/slurm_accounting_storage.h" -#ifndef HAVE_MYSQL -typedef void mysql_conn_t; -#else - -typedef struct { - MYSQL *acct_mysql_db; - bool rollback; - List update_list; - int conn; -} mysql_conn_t; +#ifdef HAVE_MYSQL //extern int acct_db_init; diff --git a/src/plugins/accounting_storage/mysql/mysql_rollup.c b/src/plugins/accounting_storage/mysql/mysql_rollup.c index 63e02bbceb173fc2c47ecc501a8042d819a1e75c..4c016658950b249bdaf76f198821217ae8c7f90e 100644 --- a/src/plugins/accounting_storage/mysql/mysql_rollup.c +++ b/src/plugins/accounting_storage/mysql/mysql_rollup.c @@ -189,7 +189,7 @@ extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn, debug3("%d query\n%s", mysql_conn->conn, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -290,7 +290,7 @@ extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn, debug3("%d query\n%s", mysql_conn->conn, query); if(!(result = mysql_db_query_ret( - mysql_conn->acct_mysql_db, query, 0))) { + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; } @@ -344,7 +344,7 @@ extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn, debug4("%d query\n%s", mysql_conn->conn, query); if(!(result2 = mysql_db_query_ret( - mysql_conn->acct_mysql_db, + mysql_conn->db_conn, query, 0))) { xfree(query); return SLURM_ERROR; @@ -510,7 +510,7 @@ extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn, "over_cpu_secs=VALUES(over_cpu_secs), " "resv_cpu_secs=VALUES(resv_cpu_secs)", now); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't add cluster hour rollup"); @@ -548,7 +548,7 @@ extern int mysql_hourly_rollup(mysql_conn_t *mysql_conn, now); debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't add assoc hour rollup"); @@ -633,7 +633,7 @@ extern int mysql_daily_rollup(mysql_conn_t *mysql_conn, cluster_hour_table, curr_end, curr_start, now); debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't add day rollup"); @@ -658,7 +658,7 @@ extern int mysql_daily_rollup(mysql_conn_t *mysql_conn, */ query = xstrdup_printf("delete from %s where end < %d && end != 0", suspend_table, start); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't remove old suspend data"); @@ -729,7 +729,7 @@ extern int mysql_monthly_rollup(mysql_conn_t *mysql_conn, cluster_day_table, curr_end, curr_start, now); debug3("%d query\n%s", mysql_conn->conn, query); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't add day rollup"); @@ -756,7 +756,7 @@ extern int mysql_monthly_rollup(mysql_conn_t *mysql_conn, query = xstrdup_printf("delete from %s where period_end < %d " "&& period_end != 0", event_table, start); - rc = mysql_db_query(mysql_conn->acct_mysql_db, query); + rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); if(rc != SLURM_SUCCESS) { error("Couldn't remove old event data"); diff --git a/src/plugins/accounting_storage/none/accounting_storage_none.c b/src/plugins/accounting_storage/none/accounting_storage_none.c index 33d85ef189803cdf46701156d962bb03fc1447b2..2cd7c7df95a90ba74ad3ebad1863a322d8418cc4 100644 --- a/src/plugins/accounting_storage/none/accounting_storage_none.c +++ b/src/plugins/accounting_storage/none/accounting_storage_none.c @@ -132,6 +132,12 @@ extern int acct_storage_p_add_associations(void *db_conn, uint32_t uid, return SLURM_SUCCESS; } +extern int acct_storage_p_add_qos(void *db_conn, uint32_t uid, + List qos_list) +{ + return SLURM_SUCCESS; +} + extern List acct_storage_p_modify_users(void *db_conn, uint32_t uid, acct_user_cond_t *user_q, acct_user_rec_t *user) @@ -191,6 +197,12 @@ extern List acct_storage_p_remove_associations(void *db_conn, uint32_t uid, return SLURM_SUCCESS; } +extern List acct_storage_p_remove_qos(void *db_conn, uint32_t uid, + acct_qos_cond_t *qos_cond) +{ + return NULL; +} + extern List acct_storage_p_get_users(void *db_conn, acct_user_cond_t *user_q) { @@ -215,6 +227,18 @@ extern List acct_storage_p_get_associations(void *db_conn, return NULL; } +extern List acct_storage_p_get_qos(void *db_conn, + acct_qos_cond_t *qos_cond) +{ + return NULL; +} + +extern List acct_storage_p_get_txn(void *db_conn, + acct_txn_cond_t *txn_cond) +{ + return NULL; +} + extern int acct_storage_p_get_usage(void *db_conn, acct_association_rec_t *acct_assoc, time_t start, time_t end) diff --git a/src/plugins/accounting_storage/pgsql/Makefile.am b/src/plugins/accounting_storage/pgsql/Makefile.am index 3c0a2833acbd9046fe8b071fb91978cd813deb48..c0c2f0637e8ee69ab1cc2728cee5e11643dc36e7 100644 --- a/src/plugins/accounting_storage/pgsql/Makefile.am +++ b/src/plugins/accounting_storage/pgsql/Makefile.am @@ -15,6 +15,7 @@ accounting_storage_pgsql_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) accounting_storage_pgsql_la_CFLAGS = $(PGSQL_CFLAGS) accounting_storage_pgsql_la_LIBADD = \ $(top_builddir)/src/database/libslurm_pgsql.la $(PGSQL_LIBS) -accounting_storage_pgsql_la_DEPENDENCIES = \ - $(top_builddir)/src/database/libslurm_pgsql.la +force: +$(accounting_storage_pgsql_la_LIBADD) : force + @cd `dirname $@` && $(MAKE) `basename $@` diff --git a/src/plugins/accounting_storage/pgsql/Makefile.in b/src/plugins/accounting_storage/pgsql/Makefile.in index c7c14a43882f4936b19fdaf6efaf7331d642a00f..7bb7cda71e29428e75e873b551aadbea2aa0d2c7 100644 --- a/src/plugins/accounting_storage/pgsql/Makefile.in +++ b/src/plugins/accounting_storage/pgsql/Makefile.in @@ -76,6 +76,9 @@ am__installdirs = "$(DESTDIR)$(pkglibdir)" pkglibLTLIBRARIES_INSTALL = $(INSTALL) LTLIBRARIES = $(pkglib_LTLIBRARIES) am__DEPENDENCIES_1 = +accounting_storage_pgsql_la_DEPENDENCIES = \ + $(top_builddir)/src/database/libslurm_pgsql.la \ + $(am__DEPENDENCIES_1) am_accounting_storage_pgsql_la_OBJECTS = \ accounting_storage_pgsql_la-accounting_storage_pgsql.lo \ accounting_storage_pgsql_la-pgsql_jobacct_process.lo @@ -283,9 +286,6 @@ accounting_storage_pgsql_la_CFLAGS = $(PGSQL_CFLAGS) accounting_storage_pgsql_la_LIBADD = \ $(top_builddir)/src/database/libslurm_pgsql.la $(PGSQL_LIBS) -accounting_storage_pgsql_la_DEPENDENCIES = \ - $(top_builddir)/src/database/libslurm_pgsql.la - all: all-am .SUFFIXES: @@ -578,6 +578,10 @@ uninstall-am: uninstall-pkglibLTLIBRARIES mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES + +force: +$(accounting_storage_pgsql_la_LIBADD) : force + @cd `dirname $@` && $(MAKE) `basename $@` # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c index 70f748e66b0e62815fa6b8fd3d41d89307ae3c9d..41d5987ecf561379aef9b1dc0706a64e98f9e4db 100644 --- a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c +++ b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c @@ -93,6 +93,7 @@ char *cluster_month_table = "cluster_month_usage_table"; char *cluster_table = "cluster_table"; char *event_table = "cluster_event_table"; char *job_table = "job_table"; +char *qos_table = "qos_table"; char *step_table = "step_table"; char *txn_table = "txn_table"; char *user_table = "user_table"; @@ -111,7 +112,7 @@ static int _get_db_index(PGconn *acct_pgsql_db, if(!(result = pgsql_db_query_ret(acct_pgsql_db, query))) { xfree(query); return -1; - } + } xfree(query); @@ -149,6 +150,8 @@ static int _pgsql_acct_check_tables(PGconn *acct_pgsql_db, char *user) { storage_field_t acct_coord_table_fields[] = { + { "creation_time", "bigint not null" }, + { "mod_time", "bigint default 0" }, { "deleted", "smallint default 0" }, { "acct", "text not null" }, { "user_name", "text not null" }, @@ -162,7 +165,7 @@ static int _pgsql_acct_check_tables(PGconn *acct_pgsql_db, { "name", "text not null" }, { "description", "text not null" }, { "organization", "text not null" }, - { "qos", "smallint default 1 not null" }, + { "qos", "text not null" }, { NULL, NULL} }; @@ -265,6 +268,16 @@ static int _pgsql_acct_check_tables(PGconn *acct_pgsql_db, { NULL, NULL} }; + storage_field_t qos_table_fields[] = { + { "creation_time", "bigint not null" }, + { "mod_time", "bigint default 0" }, + { "deleted", "smallint default 0" }, + { "id", "serial" }, + { "name", "text not null" }, + { "description", "text" }, + { NULL, NULL} + }; + storage_field_t step_table_fields[] = { { "id", "int not null" }, { "stepid", "smallint not null" }, @@ -330,7 +343,7 @@ static int _pgsql_acct_check_tables(PGconn *acct_pgsql_db, }; int i = 0, job_found = 0; - int step_found = 0, txn_found = 0, event_found = 0; + int step_found = 0, txn_found = 0, event_found = 0, qos_found = 0; int user_found = 0, acct_found = 0, acct_coord_found = 0; int cluster_found = 0, cluster_hour_found = 0, cluster_day_found = 0, cluster_month_found = 0; @@ -390,6 +403,9 @@ static int _pgsql_acct_check_tables(PGconn *acct_pgsql_db, else if(!last_ran_found && !strcmp(last_ran_table, PQgetvalue(result, i, 0))) last_ran_found = 1; + else if(!qos_found && + !strcmp(qos_table, PQgetvalue(result, i, 0))) + qos_found = 1; else if(!step_found && !strcmp(step_table, PQgetvalue(result, i, 0))) step_found = 1; @@ -589,6 +605,20 @@ static int _pgsql_acct_check_tables(PGconn *acct_pgsql_db, return SLURM_ERROR; } + if(!qos_found) { + if(pgsql_db_create_table(acct_pgsql_db, + qos_table, qos_table_fields, + ", unique (name))") + == SLURM_ERROR) + return SLURM_ERROR; + + } else { + if(pgsql_db_make_table_current(acct_pgsql_db, + step_table, + step_table_fields)) + return SLURM_ERROR; + } + if(!step_found) { if(pgsql_db_create_table(acct_pgsql_db, step_table, step_table_fields, @@ -764,7 +794,7 @@ extern int acct_storage_p_add_users(PGconn *acct_pgsql_db, uint32_t uid, } extern int acct_storage_p_add_coord(PGconn *acct_pgsql_db, uint32_t uid, - List acct_list, acct_user_cond_t *user_q) + List acct_list, acct_user_cond_t *user_cond) { return SLURM_SUCCESS; } @@ -787,87 +817,111 @@ extern int acct_storage_p_add_associations(PGconn *acct_pgsql_db, uint32_t uid, return SLURM_SUCCESS; } +extern int acct_storage_p_add_qos(PGconn *acct_pgsql_db, uint32_t uid, + List qos_list) +{ + return SLURM_SUCCESS; +} + extern List acct_storage_p_modify_users(PGconn *acct_pgsql_db, uint32_t uid, - acct_user_cond_t *user_q, - acct_user_rec_t *user) + acct_user_cond_t *user_cond, + acct_user_rec_t *user) { return SLURM_SUCCESS; } extern List acct_storage_p_modify_accounts(PGconn *acct_pgsql_db, uint32_t uid, - acct_account_cond_t *acct_q, + acct_account_cond_t *acct_cond, acct_account_rec_t *acct) { return SLURM_SUCCESS; } extern List acct_storage_p_modify_clusters(PGconn *acct_pgsql_db, uint32_t uid, - acct_cluster_cond_t *cluster_q, - acct_cluster_rec_t *cluster) + acct_cluster_cond_t *cluster_cond, + acct_cluster_rec_t *cluster) { return SLURM_SUCCESS; } -extern List acct_storage_p_modify_associations(PGconn *acct_pgsql_db, - uint32_t uid, - acct_association_cond_t *assoc_q, - acct_association_rec_t *assoc) +extern List acct_storage_p_modify_associations( + PGconn *acct_pgsql_db, uint32_t uid, + acct_association_cond_t *assoc_cond, + acct_association_rec_t *assoc) { return SLURM_SUCCESS; } extern List acct_storage_p_remove_users(PGconn *acct_pgsql_db, uint32_t uid, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { return SLURM_SUCCESS; } extern List acct_storage_p_remove_coord(PGconn *acct_pgsql_db, uint32_t uid, List acct_list, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { return SLURM_SUCCESS; } extern List acct_storage_p_remove_accts(PGconn *acct_pgsql_db, uint32_t uid, - acct_account_cond_t *acct_q) + acct_account_cond_t *acct_cond) { return SLURM_SUCCESS; } extern List acct_storage_p_remove_clusters(PGconn *acct_pgsql_db, uint32_t uid, - acct_account_cond_t *cluster_q) + acct_account_cond_t *cluster_cond) { return SLURM_SUCCESS; } -extern List acct_storage_p_remove_associations(PGconn *acct_pgsql_db, - uint32_t uid, - acct_association_cond_t *assoc_q) +extern List acct_storage_p_remove_associations( + PGconn *acct_pgsql_db, uint32_t uid, + acct_association_cond_t *assoc_cond) { return SLURM_SUCCESS; } +extern List acct_storage_p_remove_qos(void *db_conn, uint32_t uid, + acct_qos_cond_t *qos_cond) +{ + return NULL; +} + extern List acct_storage_p_get_users(PGconn *acct_pgsql_db, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { return NULL; } extern List acct_storage_p_get_accts(PGconn *acct_pgsql_db, - acct_account_cond_t *acct_q) + acct_account_cond_t *acct_cond) { return NULL; } extern List acct_storage_p_get_clusters(PGconn *acct_pgsql_db, - acct_account_cond_t *cluster_q) + acct_account_cond_t *cluster_cond) { return NULL; } extern List acct_storage_p_get_associations(PGconn *acct_pgsql_db, - acct_association_cond_t *assoc_q) + acct_association_cond_t *assoc_cond) +{ + return NULL; +} + +extern List acct_storage_p_get_qos(void *db_conn, + acct_qos_cond_t *qos_cond) +{ + return NULL; +} + +extern List acct_storage_p_get_txn(PGconn *acct_pgsql_db, + acct_txn_cond_t *txn_cond) { return NULL; } @@ -1018,8 +1072,8 @@ extern int clusteracct_storage_p_cluster_procs(PGconn *acct_pgsql_db, goto end_it; add_it: query = xstrdup_printf( - "insert into %s (cluster, cpu_count, period_start) " - "values ('%s', %u, %d)", + "insert into %s (cluster, cpu_count, period_start, reason) " + "values ('%s', %u, %d, 'Cluster processor count')", event_table, cluster, procs, event_time); rc = pgsql_db_query(acct_pgsql_db, query); xfree(query); @@ -1516,7 +1570,6 @@ extern List jobacct_storage_p_get_jobs(PGconn *acct_pgsql_db, List job_list = NULL; #ifdef HAVE_PGSQL acct_job_cond_t job_cond; - struct passwd *pw = NULL; if(!acct_pgsql_db || PQstatus(acct_pgsql_db) != CONNECTION_OK) { if(!pgsql_get_db_connection(&acct_pgsql_db, @@ -1526,24 +1579,30 @@ extern List jobacct_storage_p_get_jobs(PGconn *acct_pgsql_db, memset(&job_cond, 0, sizeof(acct_job_cond_t)); + job_cond.acct_list = selected_steps; job_cond.step_list = selected_steps; job_cond.partition_list = selected_parts; - if(params->opt_cluster) { - job_cond.cluster_list = list_create(NULL); - list_append(job_cond.cluster_list, params->opt_cluster); - } + job_cond.cluster_list = params->opt_cluster_list; - if (params->opt_uid >=0 && (pw=getpwuid(params->opt_uid))) { - job_cond.user_list = list_create(NULL); - list_append(job_cond.user_list, pw->pw_name); + if (params->opt_uid >=0) { + char *temp = xstrdup_printf("%u", params->opt_uid); + job_cond.userid_list = list_create(NULL); + list_append(job_cond.userid_list, temp); + } + + if (params->opt_gid >=0) { + char *temp = xstrdup_printf("%u", params->opt_gid); + job_cond.groupid_list = list_create(NULL); + list_append(job_cond.groupid_list, temp); } job_list = pgsql_jobacct_process_get_jobs(acct_pgsql_db, &job_cond); - if(job_cond.user_list) - list_destroy(job_cond.user_list); - if(job_cond.cluster_list) - list_destroy(job_cond.cluster_list); + if(job_cond.userid_list) + list_destroy(job_cond.userid_list); + if(job_cond.groupid_list) + list_destroy(job_cond.groupid_list); + #endif return job_list; } diff --git a/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.c b/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.c index 94e97377b535b0848f91bd91781554aa1dc3d9ae..a534e4d801859e64cc7261b3f026e5dac4016c4a 100644 --- a/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.c +++ b/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.c @@ -58,7 +58,7 @@ extern List pgsql_jobacct_process_get_jobs(PGconn *acct_pgsql_db, int set = 0; char *table_level="t2"; PGresult *result = NULL, *step_result = NULL; - int i, j; + int i, j, last_id = -1, curr_id = -1; jobacct_job_rec_t *job = NULL; jobacct_step_rec_t *step = NULL; time_t now = time(NULL); @@ -252,6 +252,24 @@ extern List pgsql_jobacct_process_get_jobs(PGconn *acct_pgsql_db, xstrcat(extra, ")"); } + if(job_cond->userid_list && list_count(job_cond->userid_list)) { + set = 0; + if(extra) + xstrcat(extra, " && ("); + else + xstrcat(extra, " where ("); + + itr = list_iterator_create(job_cond->userid_list); + while((object = list_next(itr))) { + if(set) + xstrcat(extra, " || "); + xstrfmtcat(extra, "t1.uid='%s'", object); + set = 1; + } + list_iterator_destroy(itr); + xstrcat(extra, ")"); + } + if(job_cond->partition_list && list_count(job_cond->partition_list)) { set = 0; if(extra) @@ -300,41 +318,37 @@ extern List pgsql_jobacct_process_get_jobs(PGconn *acct_pgsql_db, job_cond->usage_end, job_cond->usage_start); } - /* we need to put all the associations (t2) stuff together here */ - if(job_cond->cluster_list && list_count(job_cond->cluster_list)) { + if(job_cond->state_list && list_count(job_cond->state_list)) { set = 0; if(extra) - xstrcat(extra, " and ("); + xstrcat(extra, " && ("); else xstrcat(extra, " where ("); - itr = list_iterator_create(job_cond->cluster_list); + itr = list_iterator_create(job_cond->state_list); while((object = list_next(itr))) { if(set) - xstrcat(extra, " or "); - xstrfmtcat(extra, "%s.cluster='%s'", - table_level, object); + xstrcat(extra, " || "); + xstrfmtcat(extra, "t1.state='%s'", object); set = 1; } list_iterator_destroy(itr); - /* just incase the association is gone */ - if(set) - xstrcat(extra, " or "); - xstrfmtcat(extra, "%s.cluster is null)", table_level); + xstrcat(extra, ")"); } - if(job_cond->user_list && list_count(job_cond->user_list)) { + /* we need to put all the associations (t2) stuff together here */ + if(job_cond->cluster_list && list_count(job_cond->cluster_list)) { set = 0; if(extra) xstrcat(extra, " and ("); else xstrcat(extra, " where ("); - itr = list_iterator_create(job_cond->user_list); + itr = list_iterator_create(job_cond->cluster_list); while((object = list_next(itr))) { if(set) xstrcat(extra, " or "); - xstrfmtcat(extra, "%s.user_name='%s'", + xstrfmtcat(extra, "%s.cluster='%s'", table_level, object); set = 1; } @@ -342,7 +356,7 @@ extern List pgsql_jobacct_process_get_jobs(PGconn *acct_pgsql_db, /* just incase the association is gone */ if(set) xstrcat(extra, " or "); - xstrfmtcat(extra, "%s.user_name is null)", table_level); + xstrfmtcat(extra, "%s.cluster is null)", table_level); } no_cond: @@ -361,6 +375,11 @@ no_cond: xstrcat(query, extra); xfree(extra); } + /* Here we want to order them this way in such a way so it is + easy to look for duplicates + */ + if(job_cond && !job_cond->duplicates) + xstrcat(query, " order by jobid, submit desc"); debug3("query\n%s", query); if(!(result = pgsql_db_query_ret(acct_pgsql_db, query))) { @@ -373,6 +392,13 @@ no_cond: for (i = 0; i < PQntuples(result); i++) { char *id = PQgetvalue(result, i, JOB_REQ_ID); + curr_id = atoi(PQgetvalue(result, i, JOB_REQ_JOBID)); + + if(job_cond && !job_cond->duplicates && curr_id == last_id) + continue; + + last_id = curr_id; + job = create_jobacct_job_rec(); job->alloc_cpus = atoi(PQgetvalue(result, i, @@ -411,7 +437,76 @@ no_cond: } job->elapsed -= job->suspended; - job->jobid = atoi(PQgetvalue(result, i, JOB_REQ_JOBID)); + if(job_cond && job_cond->usage_start) { + if(job->start && (job->start < job_cond->usage_start)) + job->start = job_cond->usage_start; + + if(!job->start && job->end) + job->start = job->end; + + if(!job->end || job->end > job_cond->usage_end) + job->end = job_cond->usage_end; + + job->elapsed = job->end - job->start; + + if(atoi(PQgetvalue(result, i, JOB_REQ_SUSPENDED))) { + PGresult *result2 = NULL; + int i2=0; + /* get the suspended time for this job */ + query = xstrdup_printf( + "select start, end from %s where " + "(start < %d && (end >= %d " + "|| end = 0)) && id=%s " + "order by start", + suspend_table, + job_cond->usage_end, + job_cond->usage_start, + id); + + debug4("query\n%s", query); + if(!(result2 = pgsql_db_query_ret( + acct_pgsql_db, query))) { + list_destroy(job_list); + job_list = NULL; + break; + } + xfree(query); + for (i2 = 0; i2 < PQntuples(result2); i2++) { + int local_start = + atoi(PQgetvalue(result, i2, 0)); + int local_end = + atoi(PQgetvalue(result, i2, 1)); + + if(!local_start) + continue; + + if(job->start > local_start) + local_start = job->start; + if(job->end < local_end) + local_end = job->end; + + if((local_end - local_start) < 1) + continue; + + job->elapsed -= + (local_end - local_start); + job->suspended += + (local_end - local_start); + } + PQclear(result2); + } + } else { + job->suspended = + atoi(PQgetvalue(result, i, JOB_REQ_SUSPENDED)); + if(!job->end) { + job->elapsed = now - job->start; + } else { + job->elapsed = job->end - job->start; + } + job->elapsed -= job->suspended; + } + + job->jobid = curr_id; job->jobname = xstrdup(PQgetvalue(result, i, JOB_REQ_NAME)); job->gid = atoi(PQgetvalue(result, i, JOB_REQ_GID)); job->exitcode = atoi(PQgetvalue(result, i, JOB_REQ_COMP_CODE)); diff --git a/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.h b/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.h index 19575d88b4eb473faec6f15adb4c14a40ef12c4b..be5ff81c4f874b97101e29470a8d9b23103f2588 100644 --- a/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.h +++ b/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.h @@ -58,6 +58,7 @@ extern char *assoc_table; extern char *job_table; extern char *step_table; +extern char *suspend_table; extern List pgsql_jobacct_process_get_jobs(PGconn *acct_pgsql_db, acct_job_cond_t *job_cond); diff --git a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c index 9d602efaf27addcf29ad250f0fae5ee44e80b7d4..cda102757704108784f0a5b05987e8d5f516d5c0 100644 --- a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c +++ b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c @@ -183,14 +183,14 @@ extern int acct_storage_p_add_users(void *db_conn, uint32_t uid, List user_list) } extern int acct_storage_p_add_coord(void *db_conn, uint32_t uid, - List acct_list, acct_user_cond_t *user_q) + List acct_list, acct_user_cond_t *user_cond) { slurmdbd_msg_t req; dbd_acct_coord_msg_t get_msg; int rc, resp_code; get_msg.acct_list = acct_list; - get_msg.cond = user_q; + get_msg.cond = user_cond; req.msg_type = DBD_ADD_ACCOUNT_COORDS; req.data = &get_msg; @@ -220,7 +220,8 @@ extern int acct_storage_p_add_accts(void *db_conn, uint32_t uid, List acct_list) return rc; } -extern int acct_storage_p_add_clusters(void *db_conn, uint32_t uid, List cluster_list) +extern int acct_storage_p_add_clusters(void *db_conn, uint32_t uid, + List cluster_list) { slurmdbd_msg_t req; dbd_list_msg_t get_msg; @@ -258,9 +259,28 @@ extern int acct_storage_p_add_associations(void *db_conn, uint32_t uid, return rc; } +extern int acct_storage_p_add_qos(void *db_conn, uint32_t uid, + List qos_list) +{ + slurmdbd_msg_t req; + dbd_list_msg_t get_msg; + int rc, resp_code; + + get_msg.my_list = qos_list; + + req.msg_type = DBD_ADD_QOS; + req.data = &get_msg; + rc = slurm_send_slurmdbd_recv_rc_msg(&req, &resp_code); + + if(resp_code != SLURM_SUCCESS) + rc = resp_code; + + return rc; +} + extern List acct_storage_p_modify_users(void *db_conn, uint32_t uid, - acct_user_cond_t *user_q, - acct_user_rec_t *user) + acct_user_cond_t *user_cond, + acct_user_rec_t *user) { slurmdbd_msg_t req, resp; dbd_modify_msg_t get_msg; @@ -268,7 +288,7 @@ extern List acct_storage_p_modify_users(void *db_conn, uint32_t uid, List ret_list = NULL; int rc; - get_msg.cond = user_q; + get_msg.cond = user_cond; get_msg.rec = user; req.msg_type = DBD_MODIFY_USERS; @@ -299,7 +319,7 @@ extern List acct_storage_p_modify_users(void *db_conn, uint32_t uid, } extern List acct_storage_p_modify_accounts(void *db_conn, uint32_t uid, - acct_account_cond_t *acct_q, + acct_account_cond_t *acct_cond, acct_account_rec_t *acct) { slurmdbd_msg_t req, resp; @@ -308,7 +328,7 @@ extern List acct_storage_p_modify_accounts(void *db_conn, uint32_t uid, int rc; List ret_list = NULL; - get_msg.cond = acct_q; + get_msg.cond = acct_cond; get_msg.rec = acct; req.msg_type = DBD_MODIFY_ACCOUNTS; @@ -339,8 +359,8 @@ extern List acct_storage_p_modify_accounts(void *db_conn, uint32_t uid, } extern List acct_storage_p_modify_clusters(void *db_conn, uint32_t uid, - acct_cluster_cond_t *cluster_q, - acct_cluster_rec_t *cluster) + acct_cluster_cond_t *cluster_cond, + acct_cluster_rec_t *cluster) { slurmdbd_msg_t req; dbd_modify_msg_t get_msg; @@ -349,7 +369,7 @@ extern List acct_storage_p_modify_clusters(void *db_conn, uint32_t uid, dbd_list_msg_t *got_msg; List ret_list = NULL; - get_msg.cond = cluster_q; + get_msg.cond = cluster_cond; get_msg.rec = cluster; req.msg_type = DBD_MODIFY_CLUSTERS; @@ -380,9 +400,10 @@ extern List acct_storage_p_modify_clusters(void *db_conn, uint32_t uid, return ret_list; } -extern List acct_storage_p_modify_associations(void *db_conn, uint32_t uid, - acct_association_cond_t *assoc_q, - acct_association_rec_t *assoc) +extern List acct_storage_p_modify_associations( + void *db_conn, uint32_t uid, + acct_association_cond_t *assoc_cond, + acct_association_rec_t *assoc) { slurmdbd_msg_t req; dbd_modify_msg_t get_msg; @@ -392,7 +413,7 @@ extern List acct_storage_p_modify_associations(void *db_conn, uint32_t uid, List ret_list = NULL; - get_msg.cond = assoc_q; + get_msg.cond = assoc_cond; get_msg.rec = assoc; req.msg_type = DBD_MODIFY_ASSOCS; @@ -423,7 +444,7 @@ extern List acct_storage_p_modify_associations(void *db_conn, uint32_t uid, } extern List acct_storage_p_remove_users(void *db_conn, uint32_t uid, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { slurmdbd_msg_t req; dbd_cond_msg_t get_msg; @@ -433,7 +454,7 @@ extern List acct_storage_p_remove_users(void *db_conn, uint32_t uid, List ret_list = NULL; - get_msg.cond = user_q; + get_msg.cond = user_cond; req.msg_type = DBD_REMOVE_USERS; req.data = &get_msg; @@ -464,7 +485,7 @@ extern List acct_storage_p_remove_users(void *db_conn, uint32_t uid, extern List acct_storage_p_remove_coord(void *db_conn, uint32_t uid, List acct_list, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { slurmdbd_msg_t req; dbd_acct_coord_msg_t get_msg; @@ -475,7 +496,7 @@ extern List acct_storage_p_remove_coord(void *db_conn, uint32_t uid, get_msg.acct_list = acct_list; - get_msg.cond = user_q; + get_msg.cond = user_cond; req.msg_type = DBD_REMOVE_ACCOUNT_COORDS; req.data = &get_msg; @@ -505,7 +526,7 @@ extern List acct_storage_p_remove_coord(void *db_conn, uint32_t uid, } extern List acct_storage_p_remove_accts(void *db_conn, uint32_t uid, - acct_account_cond_t *acct_q) + acct_account_cond_t *acct_cond) { slurmdbd_msg_t req; dbd_cond_msg_t get_msg; @@ -515,7 +536,7 @@ extern List acct_storage_p_remove_accts(void *db_conn, uint32_t uid, List ret_list = NULL; - get_msg.cond = acct_q; + get_msg.cond = acct_cond; req.msg_type = DBD_REMOVE_ACCOUNTS; req.data = &get_msg; @@ -545,7 +566,7 @@ extern List acct_storage_p_remove_accts(void *db_conn, uint32_t uid, } extern List acct_storage_p_remove_clusters(void *db_conn, uint32_t uid, - acct_account_cond_t *cluster_q) + acct_account_cond_t *cluster_cond) { slurmdbd_msg_t req; dbd_cond_msg_t get_msg; @@ -555,7 +576,7 @@ extern List acct_storage_p_remove_clusters(void *db_conn, uint32_t uid, List ret_list = NULL; - get_msg.cond = cluster_q; + get_msg.cond = cluster_cond; req.msg_type = DBD_REMOVE_CLUSTERS; req.data = &get_msg; @@ -584,8 +605,9 @@ extern List acct_storage_p_remove_clusters(void *db_conn, uint32_t uid, return ret_list; } -extern List acct_storage_p_remove_associations(void *db_conn, uint32_t uid, - acct_association_cond_t *assoc_q) +extern List acct_storage_p_remove_associations( + void *db_conn, uint32_t uid, + acct_association_cond_t *assoc_cond) { slurmdbd_msg_t req; dbd_cond_msg_t get_msg; @@ -595,7 +617,7 @@ extern List acct_storage_p_remove_associations(void *db_conn, uint32_t uid, List ret_list = NULL; - get_msg.cond = assoc_q; + get_msg.cond = assoc_cond; req.msg_type = DBD_REMOVE_ASSOCS; req.data = &get_msg; @@ -624,8 +646,49 @@ extern List acct_storage_p_remove_associations(void *db_conn, uint32_t uid, return ret_list; } +extern List acct_storage_p_remove_qos( + void *db_conn, uint32_t uid, + acct_qos_cond_t *qos_cond) +{ + slurmdbd_msg_t req; + dbd_cond_msg_t get_msg; + int rc; + slurmdbd_msg_t resp; + dbd_list_msg_t *got_msg; + List ret_list = NULL; + + + get_msg.cond = qos_cond; + + req.msg_type = DBD_REMOVE_QOS; + req.data = &get_msg; + rc = slurm_send_recv_slurmdbd_msg(&req, &resp); + + if (rc != SLURM_SUCCESS) + error("slurmdbd: DBD_REMOVE_QOS failure: %m"); + else if (resp.msg_type == DBD_RC) { + dbd_rc_msg_t *msg = resp.data; + if(msg->return_code == SLURM_SUCCESS) { + info("%s", msg->comment); + ret_list = list_create(NULL); + } else + error("%s", msg->comment); + slurmdbd_free_rc_msg(msg); + } else if (resp.msg_type != DBD_GOT_LIST) { + error("slurmdbd: response type not DBD_GOT_LIST: %u", + resp.msg_type); + } else { + got_msg = (dbd_list_msg_t *) resp.data; + ret_list = got_msg->my_list; + got_msg->my_list = NULL; + slurmdbd_free_list_msg(got_msg); + } + + return ret_list; +} + extern List acct_storage_p_get_users(void *db_conn, - acct_user_cond_t *user_q) + acct_user_cond_t *user_cond) { slurmdbd_msg_t req, resp; dbd_cond_msg_t get_msg; @@ -633,7 +696,7 @@ extern List acct_storage_p_get_users(void *db_conn, int rc; List ret_list = NULL; - get_msg.cond = user_q; + get_msg.cond = user_cond; req.msg_type = DBD_GET_USERS; req.data = &get_msg; @@ -655,7 +718,7 @@ extern List acct_storage_p_get_users(void *db_conn, } extern List acct_storage_p_get_accts(void *db_conn, - acct_account_cond_t *acct_q) + acct_account_cond_t *acct_cond) { slurmdbd_msg_t req, resp; dbd_cond_msg_t get_msg; @@ -663,7 +726,7 @@ extern List acct_storage_p_get_accts(void *db_conn, int rc; List ret_list = NULL; - get_msg.cond = acct_q; + get_msg.cond = acct_cond; req.msg_type = DBD_GET_ACCOUNTS; req.data = &get_msg; @@ -686,7 +749,7 @@ extern List acct_storage_p_get_accts(void *db_conn, } extern List acct_storage_p_get_clusters(void *db_conn, - acct_account_cond_t *cluster_q) + acct_account_cond_t *cluster_cond) { slurmdbd_msg_t req, resp; dbd_cond_msg_t get_msg; @@ -694,7 +757,7 @@ extern List acct_storage_p_get_clusters(void *db_conn, int rc; List ret_list = NULL; - get_msg.cond = cluster_q; + get_msg.cond = cluster_cond; req.msg_type = DBD_GET_CLUSTERS; req.data = &get_msg; @@ -717,16 +780,15 @@ extern List acct_storage_p_get_clusters(void *db_conn, } extern List acct_storage_p_get_associations(void *db_conn, - acct_association_cond_t *assoc_q) + acct_association_cond_t *assoc_cond) { - slurmdbd_msg_t req, resp; dbd_cond_msg_t get_msg; dbd_list_msg_t *got_msg; int rc; List ret_list = NULL; - get_msg.cond = assoc_q; + get_msg.cond = assoc_cond; req.msg_type = DBD_GET_ASSOCS; req.data = &get_msg; @@ -744,6 +806,72 @@ extern List acct_storage_p_get_associations(void *db_conn, slurmdbd_free_list_msg(got_msg); } + return ret_list; +} + +extern List acct_storage_p_get_qos(void *db_conn, + acct_qos_cond_t *qos_cond) +{ + slurmdbd_msg_t req, resp; + dbd_cond_msg_t get_msg; + dbd_list_msg_t *got_msg; + int rc; + List ret_list = NULL; + + get_msg.cond = qos_cond; + + req.msg_type = DBD_GET_QOS; + req.data = &get_msg; + rc = slurm_send_recv_slurmdbd_msg(&req, &resp); + + if (rc != SLURM_SUCCESS) + error("slurmdbd: DBD_GET_QOS failure: %m"); + else if (resp.msg_type != DBD_GOT_QOS) { + error("slurmdbd: response type not DBD_GOT_QOS: %u", + resp.msg_type); + } else { + got_msg = (dbd_list_msg_t *) resp.data; + /* do this just for this type since it could be called + * multiple times, and if we send back and empty list + * instead of no list we will only call this once. + */ + if(!got_msg->my_list) + ret_list = list_create(NULL); + else + ret_list = got_msg->my_list; + got_msg->my_list = NULL; + slurmdbd_free_list_msg(got_msg); + } + + return ret_list; +} + +extern List acct_storage_p_get_txn(void *db_conn, + acct_txn_cond_t *txn_cond) +{ + slurmdbd_msg_t req, resp; + dbd_cond_msg_t get_msg; + dbd_list_msg_t *got_msg; + int rc; + List ret_list = NULL; + + get_msg.cond = txn_cond; + + req.msg_type = DBD_GET_TXN; + req.data = &get_msg; + rc = slurm_send_recv_slurmdbd_msg(&req, &resp); + + if (rc != SLURM_SUCCESS) + error("slurmdbd: DBD_GET_TXN failure: %m"); + else if (resp.msg_type != DBD_GOT_TXN) { + error("slurmdbd: response type not DBD_GOT_TXN: %u", + resp.msg_type); + } else { + got_msg = (dbd_list_msg_t *) resp.data; + ret_list = got_msg->my_list; + got_msg->my_list = NULL; + slurmdbd_free_list_msg(got_msg); + } return ret_list; } @@ -1231,7 +1359,12 @@ extern List jobacct_storage_p_get_jobs(void *db_conn, get_msg.selected_steps = selected_steps; get_msg.selected_parts = selected_parts; - get_msg.cluster_name = params->opt_cluster; + if(params->opt_cluster_list && list_count(params->opt_cluster_list)) { + ListIterator itr = + list_iterator_create(params->opt_cluster_list); + get_msg.cluster_name = list_next(itr); + list_iterator_destroy(itr); + } get_msg.gid = params->opt_gid; if (params->opt_uid >=0 && (pw=getpwuid(params->opt_uid))) diff --git a/src/plugins/checkpoint/xlch/Makefile.am b/src/plugins/checkpoint/xlch/Makefile.am index 5bcc9bae2f2eeff3df9f57a68a11f0cbc61607b4..93d4975109f2daf6906ed4ec26941e9c2a4d8f09 100644 --- a/src/plugins/checkpoint/xlch/Makefile.am +++ b/src/plugins/checkpoint/xlch/Makefile.am @@ -10,10 +10,6 @@ pkglib_LTLIBRARIES = checkpoint_xlch.la checkpoint_xlch_la_SOURCES = checkpoint_xlch.c config.c checkpoint_xlch_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la - -checkpoint_xlch_la_LIBADD = $(convenience_libs) - config.c: Makefile @( echo "char *scch_path = \"$(prefix)/sbin/scch\";"\ ) > config.c diff --git a/src/plugins/checkpoint/xlch/Makefile.in b/src/plugins/checkpoint/xlch/Makefile.in index bece591cd6f51eb43264672faa87aa309fe163e0..a873989cca0efb5fc4fa59563da05c3561eb71ec 100644 --- a/src/plugins/checkpoint/xlch/Makefile.in +++ b/src/plugins/checkpoint/xlch/Makefile.in @@ -75,7 +75,7 @@ am__strip_dir = `echo $$p | sed -e 's|^.*/||'`; am__installdirs = "$(DESTDIR)$(pkglibdir)" pkglibLTLIBRARIES_INSTALL = $(INSTALL) LTLIBRARIES = $(pkglib_LTLIBRARIES) -checkpoint_xlch_la_DEPENDENCIES = $(convenience_libs) +checkpoint_xlch_la_LIBADD = am_checkpoint_xlch_la_OBJECTS = checkpoint_xlch.lo config.lo checkpoint_xlch_la_OBJECTS = $(am_checkpoint_xlch_la_OBJECTS) checkpoint_xlch_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ @@ -271,8 +271,6 @@ INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common pkglib_LTLIBRARIES = checkpoint_xlch.la checkpoint_xlch_la_SOURCES = checkpoint_xlch.c config.c checkpoint_xlch_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la -checkpoint_xlch_la_LIBADD = $(convenience_libs) all: all-am .SUFFIXES: diff --git a/src/plugins/jobcomp/Makefile.am b/src/plugins/jobcomp/Makefile.am index 0f2d169d4f9ad29d79ab9a1d54360f3540a7c26a..14b8f4a099cc15f538886426fd662e4eea5c2ff4 100644 --- a/src/plugins/jobcomp/Makefile.am +++ b/src/plugins/jobcomp/Makefile.am @@ -1,3 +1,3 @@ # Makefile for jobcomp plugins -SUBDIRS = filetxt none script mysql pgsql slurmdbd +SUBDIRS = filetxt none script mysql pgsql diff --git a/src/plugins/jobcomp/Makefile.in b/src/plugins/jobcomp/Makefile.in index c83e5dd7c7000e3db575ae62cc9470dea86f96a5..70c8f3dbea4d2902dcf408557abe339997237a3e 100644 --- a/src/plugins/jobcomp/Makefile.in +++ b/src/plugins/jobcomp/Makefile.in @@ -247,7 +247,7 @@ target_os = @target_os@ target_vendor = @target_vendor@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ -SUBDIRS = filetxt none script mysql pgsql slurmdbd +SUBDIRS = filetxt none script mysql pgsql all: all-recursive .SUFFIXES: diff --git a/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.c b/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.c index dd882778d57983d8707fe208c576795cf0d47a7b..cf22d42a7a4d8a05d6437e2a36d4c2d317917444 100644 --- a/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.c +++ b/src/plugins/jobcomp/filetxt/filetxt_jobcomp_process.c @@ -188,7 +188,7 @@ extern void filetxt_jobcomp_process_get_jobs(List job_list, { char line[BUFFER_SIZE]; char *fptr = NULL; - char *jobid = NULL; + int jobid = 0; char *partition = NULL; FILE *fd = NULL; int lc = 0; @@ -207,7 +207,7 @@ extern void filetxt_jobcomp_process_get_jobs(List job_list, terminated strings */ if(job_info_list) list_destroy(job_info_list); - jobid = NULL; + jobid = 0; partition = NULL; job_info_list = list_create(_destroy_filetxt_jobcomp_info); while(fptr) { @@ -220,7 +220,7 @@ extern void filetxt_jobcomp_process_get_jobs(List job_list, jobcomp_info->val = fptr; fptr = strstr(fptr, " "); if(!strcasecmp("JobId", jobcomp_info->name)) - jobid = jobcomp_info->val; + jobid = atoi(jobcomp_info->val); else if(!strcasecmp("Partition", jobcomp_info->name)) partition = jobcomp_info->val; @@ -245,7 +245,7 @@ extern void filetxt_jobcomp_process_get_jobs(List job_list, continue; itr = list_iterator_create(selected_steps); while((selected_step = list_next(itr))) { - if (strcmp(selected_step->job, jobid)) + if (selected_step->jobid == jobid) continue; /* job matches */ list_iterator_destroy(itr); diff --git a/src/plugins/jobcomp/script/jobcomp_script.c b/src/plugins/jobcomp/script/jobcomp_script.c index 503da6ce607846c50a4592dbbf3b287c8475db29..8f6348a673c67cd7630deca7b46bec71c0596f0b 100644 --- a/src/plugins/jobcomp/script/jobcomp_script.c +++ b/src/plugins/jobcomp/script/jobcomp_script.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * jobcomp_script.c - Script running slurm job completion logging plugin. - * $Id: jobcomp_script.c 14242 2008-06-11 23:29:49Z jette $ + * $Id: jobcomp_script.c 14500 2008-07-11 23:00:14Z jette $ ***************************************************************************** * Produced at Center for High Performance Computing, North Dakota State * University @@ -251,16 +251,15 @@ static char ** _extend_env (char ***envp) static int _env_append (char ***envp, const char *name, const char *val) { - char buf[4096]; - char *entry; + char *entry = NULL; char **ep; if (val == NULL) val = ""; - snprintf (buf, sizeof (buf) - 1, "%s=%s", name, val); + xstrfmtcat (entry, "%s=%s", name, val); - if (!(entry = xstrdup (buf))) + if (entry == NULL) return (-1); ep = _extend_env (envp); diff --git a/src/plugins/jobcomp/slurmdbd/Makefile.am b/src/plugins/jobcomp/slurmdbd/Makefile.am deleted file mode 100644 index 5ce733dbfc7ad5cd5ff6493e0efc762ccfb6a874..0000000000000000000000000000000000000000 --- a/src/plugins/jobcomp/slurmdbd/Makefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Makefile for jobcomp/slurmdbd plugin - -AUTOMAKE_OPTIONS = foreign - -PLUGIN_FLAGS = -module -avoid-version --export-dynamic - -INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common - -pkglib_LTLIBRARIES = jobcomp_slurmdbd.la - -# Null job completion logging plugin. -jobcomp_slurmdbd_la_SOURCES = jobcomp_slurmdbd.c -jobcomp_slurmdbd_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) diff --git a/src/plugins/jobcomp/slurmdbd/Makefile.in b/src/plugins/jobcomp/slurmdbd/Makefile.in deleted file mode 100644 index 42f439efd5d53eb8ef59e4651ae6aa84c3e00f23..0000000000000000000000000000000000000000 --- a/src/plugins/jobcomp/slurmdbd/Makefile.in +++ /dev/null @@ -1,555 +0,0 @@ -# Makefile.in generated by automake 1.10.1 from Makefile.am. -# @configure_input@ - -# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, -# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - -@SET_MAKE@ - -# Makefile for jobcomp/slurmdbd plugin - -VPATH = @srcdir@ -pkgdatadir = $(datadir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ -am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd -install_sh_DATA = $(install_sh) -c -m 644 -install_sh_PROGRAM = $(install_sh) -c -install_sh_SCRIPT = $(install_sh) -c -INSTALL_HEADER = $(INSTALL_DATA) -transform = $(program_transform_name) -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -build_triplet = @build@ -host_triplet = @host@ -target_triplet = @target@ -subdir = src/plugins/jobcomp/slurmdbd -DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in -ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/auxdir/acx_pthread.m4 \ - $(top_srcdir)/auxdir/slurm.m4 \ - $(top_srcdir)/auxdir/x_ac__system_configuration.m4 \ - $(top_srcdir)/auxdir/x_ac_affinity.m4 \ - $(top_srcdir)/auxdir/x_ac_aix.m4 \ - $(top_srcdir)/auxdir/x_ac_bluegene.m4 \ - $(top_srcdir)/auxdir/x_ac_cflags.m4 \ - $(top_srcdir)/auxdir/x_ac_databases.m4 \ - $(top_srcdir)/auxdir/x_ac_debug.m4 \ - $(top_srcdir)/auxdir/x_ac_elan.m4 \ - $(top_srcdir)/auxdir/x_ac_federation.m4 \ - $(top_srcdir)/auxdir/x_ac_gpl_licensed.m4 \ - $(top_srcdir)/auxdir/x_ac_gtk.m4 \ - $(top_srcdir)/auxdir/x_ac_munge.m4 \ - $(top_srcdir)/auxdir/x_ac_ncurses.m4 \ - $(top_srcdir)/auxdir/x_ac_pam.m4 \ - $(top_srcdir)/auxdir/x_ac_ptrace.m4 \ - $(top_srcdir)/auxdir/x_ac_readline.m4 \ - $(top_srcdir)/auxdir/x_ac_setpgrp.m4 \ - $(top_srcdir)/auxdir/x_ac_setproctitle.m4 \ - $(top_srcdir)/auxdir/x_ac_sgi_job.m4 \ - $(top_srcdir)/auxdir/x_ac_slurm_ssl.m4 \ - $(top_srcdir)/auxdir/x_ac_xcpu.m4 $(top_srcdir)/configure.ac -am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ - $(ACLOCAL_M4) -mkinstalldirs = $(install_sh) -d -CONFIG_HEADER = $(top_builddir)/config.h $(top_builddir)/slurm/slurm.h -CONFIG_CLEAN_FILES = -am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; -am__vpath_adj = case $$p in \ - $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ - *) f=$$p;; \ - esac; -am__strip_dir = `echo $$p | sed -e 's|^.*/||'`; -am__installdirs = "$(DESTDIR)$(pkglibdir)" -pkglibLTLIBRARIES_INSTALL = $(INSTALL) -LTLIBRARIES = $(pkglib_LTLIBRARIES) -jobcomp_slurmdbd_la_LIBADD = -am_jobcomp_slurmdbd_la_OBJECTS = jobcomp_slurmdbd.lo -jobcomp_slurmdbd_la_OBJECTS = $(am_jobcomp_slurmdbd_la_OBJECTS) -jobcomp_slurmdbd_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ - $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ - $(jobcomp_slurmdbd_la_LDFLAGS) $(LDFLAGS) -o $@ -DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/slurm -depcomp = $(SHELL) $(top_srcdir)/auxdir/depcomp -am__depfiles_maybe = depfiles -COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ - $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ - --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ - $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -CCLD = $(CC) -LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ - --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ - $(LDFLAGS) -o $@ -SOURCES = $(jobcomp_slurmdbd_la_SOURCES) -DIST_SOURCES = $(jobcomp_slurmdbd_la_SOURCES) -ETAGS = etags -CTAGS = ctags -DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) -ACLOCAL = @ACLOCAL@ -AMTAR = @AMTAR@ -AR = @AR@ -AUTHD_CFLAGS = @AUTHD_CFLAGS@ -AUTHD_LIBS = @AUTHD_LIBS@ -AUTOCONF = @AUTOCONF@ -AUTOHEADER = @AUTOHEADER@ -AUTOMAKE = @AUTOMAKE@ -AWK = @AWK@ -BG_INCLUDES = @BG_INCLUDES@ -CC = @CC@ -CCDEPMODE = @CCDEPMODE@ -CFLAGS = @CFLAGS@ -CMD_LDFLAGS = @CMD_LDFLAGS@ -CPP = @CPP@ -CPPFLAGS = @CPPFLAGS@ -CXX = @CXX@ -CXXCPP = @CXXCPP@ -CXXDEPMODE = @CXXDEPMODE@ -CXXFLAGS = @CXXFLAGS@ -CYGPATH_W = @CYGPATH_W@ -DEFS = @DEFS@ -DEPDIR = @DEPDIR@ -DSYMUTIL = @DSYMUTIL@ -ECHO = @ECHO@ -ECHO_C = @ECHO_C@ -ECHO_N = @ECHO_N@ -ECHO_T = @ECHO_T@ -EGREP = @EGREP@ -ELAN_LIBS = @ELAN_LIBS@ -EXEEXT = @EXEEXT@ -F77 = @F77@ -FEDERATION_LDFLAGS = @FEDERATION_LDFLAGS@ -FFLAGS = @FFLAGS@ -GREP = @GREP@ -GTK2_CFLAGS = @GTK2_CFLAGS@ -GTK2_LIBS = @GTK2_LIBS@ -HAVEMYSQLCONFIG = @HAVEMYSQLCONFIG@ -HAVEPGCONFIG = @HAVEPGCONFIG@ -HAVEPKGCONFIG = @HAVEPKGCONFIG@ -HAVE_AIX = @HAVE_AIX@ -HAVE_ELAN = @HAVE_ELAN@ -HAVE_FEDERATION = @HAVE_FEDERATION@ -HAVE_OPENSSL = @HAVE_OPENSSL@ -HAVE_SOME_CURSES = @HAVE_SOME_CURSES@ -INSTALL = @INSTALL@ -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ -LDFLAGS = @LDFLAGS@ -LIBOBJS = @LIBOBJS@ -LIBS = @LIBS@ -LIBTOOL = @LIBTOOL@ -LIB_LDFLAGS = @LIB_LDFLAGS@ -LN_S = @LN_S@ -LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ -MAKEINFO = @MAKEINFO@ -MKDIR_P = @MKDIR_P@ -MUNGE_CPPFLAGS = @MUNGE_CPPFLAGS@ -MUNGE_LDFLAGS = @MUNGE_LDFLAGS@ -MUNGE_LIBS = @MUNGE_LIBS@ -MYSQL_CFLAGS = @MYSQL_CFLAGS@ -MYSQL_LIBS = @MYSQL_LIBS@ -NCURSES = @NCURSES@ -NMEDIT = @NMEDIT@ -NUMA_LIBS = @NUMA_LIBS@ -OBJEXT = @OBJEXT@ -PACKAGE = @PACKAGE@ -PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ -PACKAGE_NAME = @PACKAGE_NAME@ -PACKAGE_STRING = @PACKAGE_STRING@ -PACKAGE_TARNAME = @PACKAGE_TARNAME@ -PACKAGE_VERSION = @PACKAGE_VERSION@ -PAM_LIBS = @PAM_LIBS@ -PATH_SEPARATOR = @PATH_SEPARATOR@ -PGSQL_CFLAGS = @PGSQL_CFLAGS@ -PGSQL_LIBS = @PGSQL_LIBS@ -PLPA_LIBS = @PLPA_LIBS@ -PROCTRACKDIR = @PROCTRACKDIR@ -PROJECT = @PROJECT@ -PTHREAD_CC = @PTHREAD_CC@ -PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ -PTHREAD_LIBS = @PTHREAD_LIBS@ -RANLIB = @RANLIB@ -READLINE_LIBS = @READLINE_LIBS@ -RELEASE = @RELEASE@ -SED = @SED@ -SEMAPHORE_LIBS = @SEMAPHORE_LIBS@ -SEMAPHORE_SOURCES = @SEMAPHORE_SOURCES@ -SET_MAKE = @SET_MAKE@ -SHELL = @SHELL@ -SLURMCTLD_PORT = @SLURMCTLD_PORT@ -SLURMDBD_PORT = @SLURMDBD_PORT@ -SLURMD_PORT = @SLURMD_PORT@ -SLURM_API_AGE = @SLURM_API_AGE@ -SLURM_API_CURRENT = @SLURM_API_CURRENT@ -SLURM_API_MAJOR = @SLURM_API_MAJOR@ -SLURM_API_REVISION = @SLURM_API_REVISION@ -SLURM_API_VERSION = @SLURM_API_VERSION@ -SLURM_MAJOR = @SLURM_MAJOR@ -SLURM_MICRO = @SLURM_MICRO@ -SLURM_MINOR = @SLURM_MINOR@ -SLURM_VERSION = @SLURM_VERSION@ -SO_LDFLAGS = @SO_LDFLAGS@ -SSL_CPPFLAGS = @SSL_CPPFLAGS@ -SSL_LDFLAGS = @SSL_LDFLAGS@ -SSL_LIBS = @SSL_LIBS@ -STRIP = @STRIP@ -UTIL_LIBS = @UTIL_LIBS@ -VERSION = @VERSION@ -abs_builddir = @abs_builddir@ -abs_srcdir = @abs_srcdir@ -abs_top_builddir = @abs_top_builddir@ -abs_top_srcdir = @abs_top_srcdir@ -ac_ct_CC = @ac_ct_CC@ -ac_ct_CXX = @ac_ct_CXX@ -ac_ct_F77 = @ac_ct_F77@ -am__include = @am__include@ -am__leading_dot = @am__leading_dot@ -am__quote = @am__quote@ -am__tar = @am__tar@ -am__untar = @am__untar@ -bindir = @bindir@ -build = @build@ -build_alias = @build_alias@ -build_cpu = @build_cpu@ -build_os = @build_os@ -build_vendor = @build_vendor@ -builddir = @builddir@ -datadir = @datadir@ -datarootdir = @datarootdir@ -docdir = @docdir@ -dvidir = @dvidir@ -exec_prefix = @exec_prefix@ -host = @host@ -host_alias = @host_alias@ -host_cpu = @host_cpu@ -host_os = @host_os@ -host_vendor = @host_vendor@ -htmldir = @htmldir@ -includedir = @includedir@ -infodir = @infodir@ -install_sh = @install_sh@ -libdir = @libdir@ -libexecdir = @libexecdir@ -localedir = @localedir@ -localstatedir = @localstatedir@ -mandir = @mandir@ -mkdir_p = @mkdir_p@ -oldincludedir = @oldincludedir@ -pdfdir = @pdfdir@ -prefix = @prefix@ -program_transform_name = @program_transform_name@ -psdir = @psdir@ -sbindir = @sbindir@ -sharedstatedir = @sharedstatedir@ -srcdir = @srcdir@ -sysconfdir = @sysconfdir@ -target = @target@ -target_alias = @target_alias@ -target_cpu = @target_cpu@ -target_os = @target_os@ -target_vendor = @target_vendor@ -top_builddir = @top_builddir@ -top_srcdir = @top_srcdir@ -AUTOMAKE_OPTIONS = foreign -PLUGIN_FLAGS = -module -avoid-version --export-dynamic -INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common -pkglib_LTLIBRARIES = jobcomp_slurmdbd.la - -# Null job completion logging plugin. -jobcomp_slurmdbd_la_SOURCES = jobcomp_slurmdbd.c -jobcomp_slurmdbd_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) -all: all-am - -.SUFFIXES: -.SUFFIXES: .c .lo .o .obj -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) - @for dep in $?; do \ - case '$(am__configure_deps)' in \ - *$$dep*) \ - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ - && exit 0; \ - exit 1;; \ - esac; \ - done; \ - echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/plugins/jobcomp/slurmdbd/Makefile'; \ - cd $(top_srcdir) && \ - $(AUTOMAKE) --foreign src/plugins/jobcomp/slurmdbd/Makefile -.PRECIOUS: Makefile -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - @case '$?' in \ - *config.status*) \ - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ - *) \ - echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ - cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ - esac; - -$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh - -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -install-pkglibLTLIBRARIES: $(pkglib_LTLIBRARIES) - @$(NORMAL_INSTALL) - test -z "$(pkglibdir)" || $(MKDIR_P) "$(DESTDIR)$(pkglibdir)" - @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \ - if test -f $$p; then \ - f=$(am__strip_dir) \ - echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(pkglibLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) '$$p' '$(DESTDIR)$(pkglibdir)/$$f'"; \ - $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(pkglibLTLIBRARIES_INSTALL) $(INSTALL_STRIP_FLAG) "$$p" "$(DESTDIR)$(pkglibdir)/$$f"; \ - else :; fi; \ - done - -uninstall-pkglibLTLIBRARIES: - @$(NORMAL_UNINSTALL) - @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \ - p=$(am__strip_dir) \ - echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(pkglibdir)/$$p'"; \ - $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(pkglibdir)/$$p"; \ - done - -clean-pkglibLTLIBRARIES: - -test -z "$(pkglib_LTLIBRARIES)" || rm -f $(pkglib_LTLIBRARIES) - @list='$(pkglib_LTLIBRARIES)'; for p in $$list; do \ - dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ - test "$$dir" != "$$p" || dir=.; \ - echo "rm -f \"$${dir}/so_locations\""; \ - rm -f "$${dir}/so_locations"; \ - done -jobcomp_slurmdbd.la: $(jobcomp_slurmdbd_la_OBJECTS) $(jobcomp_slurmdbd_la_DEPENDENCIES) - $(jobcomp_slurmdbd_la_LINK) -rpath $(pkglibdir) $(jobcomp_slurmdbd_la_OBJECTS) $(jobcomp_slurmdbd_la_LIBADD) $(LIBS) - -mostlyclean-compile: - -rm -f *.$(OBJEXT) - -distclean-compile: - -rm -f *.tab.c - -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/jobcomp_slurmdbd.Plo@am__quote@ - -.c.o: -@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(COMPILE) -c $< - -.c.obj: -@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` - -.c.lo: -@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< -@am__fastdepCC_TRUE@ mv -f $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< - -mostlyclean-libtool: - -rm -f *.lo - -clean-libtool: - -rm -rf .libs _libs - -ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ - mkid -fID $$unique -tags: TAGS - -TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - tags=; \ - here=`pwd`; \ - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ - if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ - test -n "$$unique" || unique=$$empty_fix; \ - $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ - $$tags $$unique; \ - fi -ctags: CTAGS -CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - tags=; \ - list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ - END { if (nonempty) { for (i in files) print i; }; }'`; \ - test -z "$(CTAGS_ARGS)$$tags$$unique" \ - || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ - $$tags $$unique - -GTAGS: - here=`$(am__cd) $(top_builddir) && pwd` \ - && cd $(top_srcdir) \ - && gtags -i $(GTAGS_ARGS) $$here - -distclean-tags: - -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags - -distdir: $(DISTFILES) - @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - list='$(DISTFILES)'; \ - dist_files=`for file in $$list; do echo $$file; done | \ - sed -e "s|^$$srcdirstrip/||;t" \ - -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ - case $$dist_files in \ - */*) $(MKDIR_P) `echo "$$dist_files" | \ - sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ - sort -u` ;; \ - esac; \ - for file in $$dist_files; do \ - if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ - if test -d $$d/$$file; then \ - dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ - if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ - cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ - fi; \ - cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ - else \ - test -f $(distdir)/$$file \ - || cp -p $$d/$$file $(distdir)/$$file \ - || exit 1; \ - fi; \ - done -check-am: all-am -check: check-am -all-am: Makefile $(LTLIBRARIES) -installdirs: - for dir in "$(DESTDIR)$(pkglibdir)"; do \ - test -z "$$dir" || $(MKDIR_P) "$$dir"; \ - done -install: install-am -install-exec: install-exec-am -install-data: install-data-am -uninstall: uninstall-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am - -installcheck: installcheck-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ - install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ - `test -z '$(STRIP)' || \ - echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) - -maintainer-clean-generic: - @echo "This command is intended for maintainers to use" - @echo "it deletes files that may require special tools to rebuild." -clean: clean-am - -clean-am: clean-generic clean-libtool clean-pkglibLTLIBRARIES \ - mostlyclean-am - -distclean: distclean-am - -rm -rf ./$(DEPDIR) - -rm -f Makefile -distclean-am: clean-am distclean-compile distclean-generic \ - distclean-tags - -dvi: dvi-am - -dvi-am: - -html: html-am - -info: info-am - -info-am: - -install-data-am: - -install-dvi: install-dvi-am - -install-exec-am: install-pkglibLTLIBRARIES - -install-html: install-html-am - -install-info: install-info-am - -install-man: - -install-pdf: install-pdf-am - -install-ps: install-ps-am - -installcheck-am: - -maintainer-clean: maintainer-clean-am - -rm -rf ./$(DEPDIR) - -rm -f Makefile -maintainer-clean-am: distclean-am maintainer-clean-generic - -mostlyclean: mostlyclean-am - -mostlyclean-am: mostlyclean-compile mostlyclean-generic \ - mostlyclean-libtool - -pdf: pdf-am - -pdf-am: - -ps: ps-am - -ps-am: - -uninstall-am: uninstall-pkglibLTLIBRARIES - -.MAKE: install-am install-strip - -.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ - clean-libtool clean-pkglibLTLIBRARIES ctags distclean \ - distclean-compile distclean-generic distclean-libtool \ - distclean-tags distdir dvi dvi-am html html-am info info-am \ - install install-am install-data install-data-am install-dvi \ - install-dvi-am install-exec install-exec-am install-html \ - install-html-am install-info install-info-am install-man \ - install-pdf install-pdf-am install-pkglibLTLIBRARIES \ - install-ps install-ps-am install-strip installcheck \ - installcheck-am installdirs maintainer-clean \ - maintainer-clean-generic mostlyclean mostlyclean-compile \ - mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ - tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/src/plugins/jobcomp/slurmdbd/jobcomp_slurmdbd.c b/src/plugins/jobcomp/slurmdbd/jobcomp_slurmdbd.c deleted file mode 100644 index aceee6cc8a5f8045ba114714fb496cfa867d5dad..0000000000000000000000000000000000000000 --- a/src/plugins/jobcomp/slurmdbd/jobcomp_slurmdbd.c +++ /dev/null @@ -1,139 +0,0 @@ -/*****************************************************************************\ - * jobcomp_slurmdbd.c - SlurmDBD slurm job completion plugin. - ***************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. - * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). - * Written by Danny Aubke <da@llnl.gov>. - * LLNL-CODE-402394. - * - * This file is part of SLURM, a resource management program. - * For details, see <http://www.llnl.gov/linux/slurm/>. - * - * SLURM is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * In addition, as a special exception, the copyright holders give permission - * to link the code of portions of this program with the OpenSSL library under - * certain conditions as described in each individual source file, and - * distribute linked combinations including the two. You must obey the GNU - * General Public License in all respects for all of the code used other than - * OpenSSL. If you modify file(s) with this exception, you may extend this - * exception to your version of the file(s), but you are not obligated to do - * so. If you do not wish to do so, delete this exception statement from your - * version. If you delete this exception statement from all source files in - * the program, then also delete it here. - * - * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more - * details. - * - * You should have received a copy of the GNU General Public License along - * with SLURM; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -\*****************************************************************************/ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#if HAVE_STDINT_H -# include <stdint.h> -#endif -#if HAVE_INTTYPES_H -# include <inttypes.h> -#endif - -#include <stdio.h> -#include <slurm/slurm_errno.h> - -#include "src/common/slurm_jobcomp.h" -#include "src/slurmctld/slurmctld.h" - -/* - * These variables are required by the generic plugin interface. If they - * are not found in the plugin, the plugin loader will ignore it. - * - * plugin_name - a string giving a human-readable description of the - * plugin. There is no maximum length, but the symbol must refer to - * a valid string. - * - * plugin_type - a string suggesting the type of the plugin or its - * applicability to a particular form of data or method of data handling. - * If the low-level plugin API is used, the contents of this string are - * unimportant and may be anything. SLURM uses the higher-level plugin - * interface which requires this string to be of the form - * - * <application>/<method> - * - * where <application> is a description of the intended application of - * the plugin (e.g., "jobcomp" for SLURM job completion logging) and <method> - * is a description of how this plugin satisfies that application. SLURM will - * only load job completion logging plugins if the plugin_type string has a - * prefix of "jobcomp/". - * - * plugin_version - an unsigned 32-bit integer giving the version number - * of the plugin. If major and minor revisions are desired, the major - * version number may be multiplied by a suitable magnitude constant such - * as 100 or 1000. Various SLURM versions will likely require a certain - * minimum versions for their plugins as the job completion logging API - * matures. - */ -const char plugin_name[] = "Job completion logging SLURMDBD plugin"; -const char plugin_type[] = "jobcomp/slurmdbd"; -const uint32_t plugin_version = 100; - -/* - * init() is called when the plugin is loaded, before any other functions - * are called. Put global initialization here. - */ -int init ( void ) -{ - return SLURM_SUCCESS; -} - -/* - * The remainder of this file implements the standard SLURM job completion - * logging API. - */ - -int slurm_jobcomp_set_location ( char * location ) -{ - return SLURM_SUCCESS; -} - -int slurm_jobcomp_log_record ( struct job_record *job_ptr ) -{ - return SLURM_SUCCESS; -} - -int slurm_jobcomp_get_errno( void ) -{ - return SLURM_SUCCESS; -} - -char *slurm_jobcomp_strerror( int errnum ) -{ - return NULL; -} - -List slurm_jobcomp_get_jobs(List selected_steps, List selected_parts, - void *params) -{ - return NULL; -} - -void slurm_jobcomp_archive(List selected_parts, - void *params) -{ - return; -} - -int fini ( void ) -{ - return SLURM_SUCCESS; -} - - diff --git a/src/plugins/mpi/mvapich/mvapich.c b/src/plugins/mpi/mvapich/mvapich.c index f817698b7e73b3f74833c13067e1b20c7a571d1b..5e9c50a92d3e0c21c8b45464a750c17385df5a62 100644 --- a/src/plugins/mpi/mvapich/mvapich.c +++ b/src/plugins/mpi/mvapich/mvapich.c @@ -408,6 +408,20 @@ static void mvapich_poll_destroy (struct mvapich_poll *mp) xfree (mp); } + +/* + * Call poll(2) on mvapich_poll object, handling EAGAIN and EINTR errors. + */ +static int mvapich_poll_internal (struct mvapich_poll *mp) +{ + int n; + while ((n = poll (mp->fds, mp->nfds, startup_timeout (mp->st))) < 0) { + if (errno != EINTR && errno != EAGAIN) + return (-1); + } + return (n); +} + /* * Poll for next available mvapich_info object with read/write activity * @@ -457,7 +471,7 @@ again: mvapich_debug3 ("mvapich_poll_next (nfds=%d, timeout=%d)\n", mp->nfds, startup_timeout (st)); - if ((rc = poll (mp->fds, mp->nfds, startup_timeout (st))) < 0) + if ((rc = mvapich_poll_internal (mp)) < 0) mvapich_terminate_job (st, "mvapich_poll_next: %m"); else if (rc == 0) { /* @@ -471,9 +485,9 @@ again: /* * Loop through poll fds and return first mvapich_info object - * we find that has the requested read/write activity. + * we find that has the requested read/write activity. * When found, we update the loop counter, and return - * the corresponding mvapich_info object. + * the corresponding mvapich_info object. * */ for (i = mp->counter; i < mp->nfds; i++) { @@ -786,7 +800,7 @@ static int mvapich_recv (mvapich_state_t *st, void* buf, int size, int rank) /* Scatter data in buf to ranks using chunks of size bytes */ static int mvapich_scatterbcast (mvapich_state_t *st, void* buf, int size) { - int rc; + int rc = 0; int n = 0; struct mvapich_poll *mp; struct mvapich_info *mvi; @@ -1284,8 +1298,9 @@ static int mvapich_abort_accept (mvapich_state_t *st) mvapich_abort_timeout ()); while ((rc = poll (pfds, 1, mvapich_abort_timeout ())) < 0) { - if (errno != EINTR) - return (-1); + if (errno == EINTR || errno == EAGAIN) + continue; + return (-1); } /* @@ -1689,11 +1704,13 @@ mvapich_initialize_connections (mvapich_state_t *st, mvapich_debug3 ("do_poll (nfds=%d)\n", nfds); - if ((rc = poll (fds, nfds, startup_timeout (st))) < 0) { + while ((rc = poll (fds, nfds, startup_timeout (st))) < 0) { + if (errno == EINTR || errno == EAGAIN) + continue; error ("mvapich: poll: %m"); break; } - else if (rc == 0) { + if (rc == 0) { report_absent_tasks (st, 1); mvapich_terminate_job (st, NULL); } diff --git a/src/plugins/select/bluegene/plugin/Makefile.am b/src/plugins/select/bluegene/plugin/Makefile.am index fde0b65dc943164a9f92b0e59d239eee2fed685d..da526d6e79e8b00f8c2eff05e5faabd0502abbb0 100644 --- a/src/plugins/select/bluegene/plugin/Makefile.am +++ b/src/plugins/select/bluegene/plugin/Makefile.am @@ -47,3 +47,7 @@ slurm_epilog_SOURCES = slurm_epilog.c sfree_LDFLAGS = -export-dynamic -lm $(CMD_LDFLAGS) slurm_prolog_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) slurm_epilog_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) + +force: +$(select_bluegene_la_LIBADD) $(sfree_LDADD) : force + @cd `dirname $@` && $(MAKE) `basename $@` diff --git a/src/plugins/select/bluegene/plugin/Makefile.in b/src/plugins/select/bluegene/plugin/Makefile.in index e21b8b361a2eff20a88ace3cbdb67413756b81eb..3e66184e535cf9d9e17dce6db057aed13ea110ac 100644 --- a/src/plugins/select/bluegene/plugin/Makefile.in +++ b/src/plugins/select/bluegene/plugin/Makefile.in @@ -689,6 +689,10 @@ uninstall-am: uninstall-pkglibLTLIBRARIES uninstall-sbinPROGRAMS tags uninstall uninstall-am uninstall-pkglibLTLIBRARIES \ uninstall-sbinPROGRAMS + +force: +$(select_bluegene_la_LIBADD) $(sfree_LDADD) : force + @cd `dirname $@` && $(MAKE) `basename $@` # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/src/plugins/select/cons_res/dist_tasks.c b/src/plugins/select/cons_res/dist_tasks.c index 7a4fafe861d3c29b004cb16ee78a43e80884ad56..deafd2fd3e839b61428ce55628219a5546084b77 100644 --- a/src/plugins/select/cons_res/dist_tasks.c +++ b/src/plugins/select/cons_res/dist_tasks.c @@ -63,16 +63,22 @@ int compute_c_b_task_dist(struct select_cr_job *job) { int i, j, rc = SLURM_SUCCESS; + bool over_commit = false; bool over_subscribe = false; uint32_t taskid = 0, last_taskid, maxtasks = job->nprocs; + if (job->job_ptr->details && job->job_ptr->details->overcommit) + over_commit = true; + for (j = 0; (taskid < maxtasks); j++) { /* cycle counter */ bool space_remaining = false; last_taskid = taskid; for (i = 0; ((i < job->nhosts) && (taskid < maxtasks)); i++) { if ((j < job->cpus[i]) || over_subscribe) { taskid++; - job->alloc_cpus[i]++; + if ((job->alloc_cpus[i] == 0) || + (!over_commit)) + job->alloc_cpus[i]++; if ((j + 1) < job->cpus[i]) space_remaining = true; } @@ -508,6 +514,7 @@ extern int cr_plane_dist(struct select_cr_job *job, uint32_t taskcount = 0, last_taskcount; int job_index = -1; bool count_done = false; + bool over_commit = false; debug3("cons_res _cr_plane_dist plane_size %u ", plane_size); debug3("cons_res _cr_plane_dist maxtasks %u num_hosts %u", @@ -517,7 +524,10 @@ extern int cr_plane_dist(struct select_cr_job *job, error("Error in _cr_plane_dist"); return SLURM_ERROR; } - + + if (job->job_ptr->details && job->job_ptr->details->overcommit) + over_commit = true; + taskcount = 0; for (j=0; ((taskcount<maxtasks) && (!count_done)); j++) { last_taskcount = taskcount; @@ -530,7 +540,9 @@ extern int cr_plane_dist(struct select_cr_job *job, break; } taskcount++; - job->alloc_cpus[i]++; + if ((job->alloc_cpus[i] == 0) || + (!over_commit)) + job->alloc_cpus[i]++; } } if (last_taskcount == taskcount) { diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index e878ffe6f33f70dbc45c731498a2de149db9ba1f..43c5a66bdb4c95b2c3729e05e8baa0c00dcce631 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -2,7 +2,7 @@ * select_cons_res.c - node selection plugin supporting consumable * resources policies. * - * $Id: select_cons_res.c 14297 2008-06-20 15:41:06Z jette $ + * $Id: select_cons_res.c 14469 2008-07-09 18:15:23Z jette $ *****************************************************************************\ * * The following example below illustrates how four jobs are allocated @@ -599,6 +599,17 @@ static uint16_t _get_task_count(struct node_cr_record *select_node_ptr, &threads, alloc_cores, cr_type, job_ptr->job_id, this_node->node_ptr->name); + + if (job_ptr->details->job_min_memory & MEM_PER_CPU) { + uint32_t free_mem, mem_per_cpu; + int max_cpus; + mem_per_cpu = job_ptr->details->job_min_memory & (~MEM_PER_CPU); + free_mem = this_node->real_memory - this_node->alloc_memory; + max_cpus = free_mem / mem_per_cpu; + /* info("cpus avail:%d mem for %d", numtasks, max_cpus); */ + numtasks = MIN(numtasks, max_cpus); + } + #if (CR_DEBUG) info("cons_res: _get_task_count computed a_tasks %d s %d c %d " "t %d on %s for job %d", @@ -1992,8 +2003,9 @@ static int _verify_node_state(struct node_cr_record *select_node_ptr, enum node_cr_state job_node_req) { int i; - uint32_t free_mem; + uint32_t free_mem, min_mem; + min_mem = job_ptr->details->job_min_memory & (~MEM_PER_CPU); for (i = 0; i < select_node_cnt; i++) { if (!bit_test(bitmap, i)) continue; @@ -2003,7 +2015,7 @@ static int _verify_node_state(struct node_cr_record *select_node_ptr, (cr_type == CR_MEMORY) || (cr_type == CR_SOCKET_MEMORY))) { free_mem = select_node_ptr[i].real_memory; free_mem -= select_node_ptr[i].alloc_memory; - if (free_mem < job_ptr->details->job_min_memory) + if (free_mem < min_mem) goto clear_bit; } @@ -2589,9 +2601,6 @@ static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap, job->cpus[j] = 0; } job->alloc_cpus[j] = 0; - if ((cr_type == CR_CORE_MEMORY) || (cr_type == CR_CPU_MEMORY) || - (cr_type == CR_MEMORY) || (cr_type == CR_SOCKET_MEMORY)) - job->alloc_memory[j] = job_ptr->details->job_min_memory; if ((cr_type == CR_CORE) || (cr_type == CR_CORE_MEMORY)|| (cr_type == CR_SOCKET) || (cr_type == CR_SOCKET_MEMORY)) { _chk_resize_job(job, j, job->num_sockets[j]); @@ -2652,6 +2661,26 @@ static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap, return error_code; } + if (job_ptr->details->job_min_memory && + ((cr_type == CR_CORE_MEMORY) || (cr_type == CR_CPU_MEMORY) || + (cr_type == CR_MEMORY) || (cr_type == CR_SOCKET_MEMORY))) { + j = 0; + for (i = 0; i < node_record_count; i++) { + if (bit_test(bitmap, i) == 0) + continue; + if (job_ptr->details->job_min_memory & MEM_PER_CPU) { + job->alloc_memory[j] = job_ptr->details-> + job_min_memory & + (~MEM_PER_CPU); + job->alloc_memory[j] *= job->alloc_cpus[j]; + } else { + job->alloc_memory[j] = job_ptr->details-> + job_min_memory; + } + j++; + } + } + _append_to_job_list(job); last_cr_update_time = time(NULL); diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c index 28c8ddc8ea3a3dfeded36e844e115b2a0255ecac..787580d21aedceffd8d0a5bef83773dcb62dd304 100644 --- a/src/plugins/select/linear/select_linear.c +++ b/src/plugins/select/linear/select_linear.c @@ -2,8 +2,6 @@ * select_linear.c - node selection plugin for simple one-dimensional * address space. Selects nodes for a job so as to minimize the number * of sets of consecutive nodes using a best-fit algorithm. - * - * $Id: select_linear.c 14103 2008-05-21 20:31:22Z jette $ ***************************************************************************** * Copyright (C) 2004-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -559,7 +557,7 @@ static int _job_count_bitmap(struct node_cr_record *node_cr_ptr, { int i, count = 0, total_jobs, total_run_jobs; struct part_cr_record *part_cr_ptr; - uint32_t job_memory = 0; + uint32_t job_memory_cpu = 0, job_memory_node = 0; bool exclusive; xassert(node_cr_ptr); @@ -572,24 +570,42 @@ static int _job_count_bitmap(struct node_cr_record *node_cr_ptr, else exclusive = true; - if (job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) - job_memory = job_ptr->details->job_min_memory; + if (job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) { + if (job_ptr->details->job_min_memory & MEM_PER_CPU) { + job_memory_cpu = job_ptr->details->job_min_memory & + (~MEM_PER_CPU); + } else + job_memory_node = job_ptr->details->job_min_memory; + } for (i = 0; i < node_record_count; i++) { if (!bit_test(bitmap, i)) { bit_clear(jobmap, i); continue; } - - if (select_fast_schedule) { - if ((node_cr_ptr[i].alloc_memory + job_memory) > - node_record_table_ptr[i].config_ptr->real_memory) { - bit_clear(jobmap, i); - continue; + if (job_memory_cpu || job_memory_node) { + uint32_t alloc_mem, job_mem, avail_mem; + alloc_mem = node_cr_ptr[i].alloc_memory; + if (select_fast_schedule) { + avail_mem = node_record_table_ptr[i]. + config_ptr->real_memory; + if (job_memory_cpu) { + job_mem = job_memory_cpu * + node_record_table_ptr[i]. + config_ptr->cpus; + } else + job_mem = job_memory_node; + } else { + avail_mem = node_record_table_ptr[i]. + real_memory; + if (job_memory_cpu) { + job_mem = job_memory_cpu * + node_record_table_ptr[i]. + cpus; + } else + job_mem = job_memory_node; } - } else { - if ((node_cr_ptr[i].alloc_memory + job_memory) > - node_record_table_ptr[i].real_memory) { + if ((alloc_mem + job_mem) >avail_mem) { bit_clear(jobmap, i); continue; } @@ -1132,7 +1148,7 @@ static int _rm_job_from_nodes(struct node_cr_record *node_cr_ptr, { int i, rc = SLURM_SUCCESS; struct part_cr_record *part_cr_ptr; - uint32_t job_memory = 0; + uint32_t job_memory, job_memory_cpu = 0, job_memory_node = 0; if (node_cr_ptr == NULL) { error("%s: node_cr_ptr not initialized", pre_err); @@ -1140,12 +1156,27 @@ static int _rm_job_from_nodes(struct node_cr_record *node_cr_ptr, } if (remove_all && job_ptr->details && - job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) - job_memory = job_ptr->details->job_min_memory; + job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) { + if (job_ptr->details->job_min_memory & MEM_PER_CPU) { + job_memory_cpu = job_ptr->details->job_min_memory & + (~MEM_PER_CPU); + } else + job_memory_node = job_ptr->details->job_min_memory; + } for (i = 0; i < select_node_cnt; i++) { if (bit_test(job_ptr->node_bitmap, i) == 0) continue; + if (job_memory_cpu == 0) + job_memory = job_memory_node; + else if (select_fast_schedule) { + job_memory = job_memory_cpu * + node_record_table_ptr[i]. + config_ptr->cpus; + } else { + job_memory = job_memory_cpu * + node_record_table_ptr[i].cpus; + } if (node_cr_ptr[i].alloc_memory >= job_memory) node_cr_ptr[i].alloc_memory -= job_memory; else { @@ -1208,7 +1239,7 @@ static int _add_job_to_nodes(struct node_cr_record *node_cr_ptr, { int i, rc = SLURM_SUCCESS, exclusive = 0; struct part_cr_record *part_cr_ptr; - uint32_t job_memory = 0; + uint32_t job_memory_cpu = 0, job_memory_node = 0; if (node_cr_ptr == NULL) { error("%s: node_cr_ptr not initialized", pre_err); @@ -1216,15 +1247,32 @@ static int _add_job_to_nodes(struct node_cr_record *node_cr_ptr, } if (alloc_all && job_ptr->details && - job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) - job_memory = job_ptr->details->job_min_memory; + job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) { + if (job_ptr->details->job_min_memory & MEM_PER_CPU) { + job_memory_cpu = job_ptr->details->job_min_memory & + (~MEM_PER_CPU); + } else + job_memory_node = job_ptr->details->job_min_memory; + } + if (job_ptr->details->shared == 0) exclusive = 1; for (i = 0; i < select_node_cnt; i++) { if (bit_test(job_ptr->node_bitmap, i) == 0) continue; - node_cr_ptr[i].alloc_memory += job_memory; + if (job_memory_cpu == 0) + node_cr_ptr[i].alloc_memory += job_memory_node; + else if (select_fast_schedule) { + node_cr_ptr[i].alloc_memory += + job_memory_cpu * + node_record_table_ptr[i]. + config_ptr->cpus; + } else { + node_cr_ptr[i].alloc_memory += + job_memory_cpu * + node_record_table_ptr[i].cpus; + } if (exclusive) { if (node_cr_ptr[i].exclusive_jobid) { error("select/linear: conflicting exclusive " @@ -1341,7 +1389,7 @@ static void _init_node_cr(void) ListIterator part_iterator; struct job_record *job_ptr; ListIterator job_iterator; - uint32_t job_memory, step_mem; + uint32_t job_memory_cpu, job_memory_node, step_mem = 0; int exclusive, i, node_inx; ListIterator step_iterator; struct step_record *step_ptr; @@ -1375,11 +1423,17 @@ static void _init_node_cr(void) (job_ptr->job_state != JOB_SUSPENDED)) continue; + job_memory_cpu = 0; + job_memory_node = 0; if (job_ptr->details && - job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) - job_memory = job_ptr->details->job_min_memory; - else - job_memory = 0; + job_ptr->details->job_min_memory && (cr_type == CR_MEMORY)) { + if (job_ptr->details->job_min_memory & MEM_PER_CPU) { + job_memory_cpu = job_ptr->details->job_min_memory & + (~MEM_PER_CPU); + } else { + job_memory_node = job_ptr->details->job_min_memory; + } + } if (job_ptr->details->shared == 0) exclusive = 1; else @@ -1400,7 +1454,18 @@ static void _init_node_cr(void) } node_cr_ptr[i].exclusive_jobid = job_ptr->job_id; } - node_cr_ptr[i].alloc_memory += job_memory; + if (job_memory_cpu == 0) + node_cr_ptr[i].alloc_memory += job_memory_node; + else if (select_fast_schedule) { + node_cr_ptr[i].alloc_memory += + job_memory_cpu * + node_record_table_ptr[i]. + config_ptr->cpus; + } else { + node_cr_ptr[i].alloc_memory += + job_memory_cpu * + node_record_table_ptr[i].cpus; + } part_cr_ptr = node_cr_ptr[i].parts; while (part_cr_ptr) { if (part_cr_ptr->part_ptr != job_ptr->part_ptr) { diff --git a/src/sacct/Makefile.am b/src/sacct/Makefile.am index 4b25bcca7f72021aded7dfbdacc15504a90165a1..c51d734350da5744ededafa052bfec953e581fc1 100644 --- a/src/sacct/Makefile.am +++ b/src/sacct/Makefile.am @@ -1,13 +1,15 @@ # Makefile for sacct AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) bin_PROGRAMS = sacct -sacct_LDADD = $(top_builddir)/src/common/libcommon.o -ldl \ - $(top_builddir)/src/api/libslurmhelper.la +sacct_LDADD = $(top_builddir)/src/api/libslurm.o -ldl + +sacct_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) noinst_HEADERS = sacct.c sacct_SOURCES = sacct.c process.c print.c options.c sacct_stat.c @@ -16,5 +18,4 @@ force: $(sacct_LDADD) : force @cd `dirname $@` && $(MAKE) `basename $@` -sacct_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) diff --git a/src/sacct/Makefile.in b/src/sacct/Makefile.in index c3a7b5a9bf0ee3e8cbd978b3cfad78e880a935f5..5f7bea047f63c55fba41823e84074bd35a010cdd 100644 --- a/src/sacct/Makefile.in +++ b/src/sacct/Makefile.in @@ -75,8 +75,7 @@ PROGRAMS = $(bin_PROGRAMS) am_sacct_OBJECTS = sacct.$(OBJEXT) process.$(OBJEXT) print.$(OBJEXT) \ options.$(OBJEXT) sacct_stat.$(OBJEXT) sacct_OBJECTS = $(am_sacct_OBJECTS) -sacct_DEPENDENCIES = $(top_builddir)/src/common/libcommon.o \ - $(top_builddir)/src/api/libslurmhelper.la +sacct_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o sacct_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sacct_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -266,13 +265,12 @@ target_vendor = @target_vendor@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) -sacct_LDADD = $(top_builddir)/src/common/libcommon.o -ldl \ - $(top_builddir)/src/api/libslurmhelper.la - +sacct_LDADD = $(top_builddir)/src/api/libslurm.o -ldl +sacct_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) noinst_HEADERS = sacct.c sacct_SOURCES = sacct.c process.c print.c options.c sacct_stat.c -sacct_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) all: all-am .SUFFIXES: @@ -474,6 +472,7 @@ install-strip: mostlyclean-generic: clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) diff --git a/src/sacct/options.c b/src/sacct/options.c index 4f0a729906c1b8dcb5180bde9fe200d9fa0a45f3..8a924a70eeef73f201ee605d72e78a98e267335f 100644 --- a/src/sacct/options.c +++ b/src/sacct/options.c @@ -38,6 +38,7 @@ \*****************************************************************************/ #include "src/common/read_config.h" +#include "src/common/parse_time.h" #include "sacct.h" #include <time.h> @@ -76,6 +77,313 @@ void _help_fields_msg(void) return; } +static char *_convert_to_id(char *name, bool gid) +{ + if(gid) { + struct group *grp; + if (!(grp=getgrnam(name))) { + fprintf(stderr, "Invalid group id: %s\n", name); + exit(1); + } + xfree(name); + name = xstrdup_printf("%d", grp->gr_gid); + } else { + struct passwd *pwd; + if (!(pwd=getpwnam(name))) { + fprintf(stderr, "Invalid user id: %s\n", name); + exit(1); + } + xfree(name); + name = xstrdup_printf("%d", pwd->pw_uid); + } + return name; +} + +/* returns number of objects added to list */ +static int _addto_id_char_list(List char_list, char *names, bool gid) +{ + int i=0, start=0; + char *name = NULL, *tmp_char = NULL; + ListIterator itr = NULL; + char quote_c = '\0'; + int quote = 0; + int count = 0; + + if(!char_list) { + error("No list was given to fill in"); + return 0; + } + + itr = list_iterator_create(char_list); + if(names) { + if (names[i] == '\"' || names[i] == '\'') { + quote_c = names[i]; + quote = 1; + i++; + } + start = i; + while(names[i]) { + //info("got %d - %d = %d", i, start, i-start); + if(quote && names[i] == quote_c) + break; + else if (names[i] == '\"' || names[i] == '\'') + names[i] = '`'; + else if(names[i] == ',') { + if((i-start) > 0) { + name = xmalloc((i-start+1)); + memcpy(name, names+start, (i-start)); + //info("got %s %d", name, i-start); + if (!isdigit((int) *name)) { + name = _convert_to_id( + name, gid); + } + + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + list_append(char_list, name); + count++; + } else + xfree(name); + list_iterator_reset(itr); + } + i++; + start = i; + if(!names[i]) { + info("There is a problem with " + "your request. It appears you " + "have spaces inside your list."); + break; + } + } + i++; + } + if((i-start) > 0) { + name = xmalloc((i-start)+1); + memcpy(name, names+start, (i-start)); + + if (!isdigit((int) *name)) { + name = _convert_to_id(name, gid); + } + + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + list_append(char_list, name); + count++; + } else + xfree(name); + } + } + list_iterator_destroy(itr); + return count; +} + +/* returns number of objects added to list */ +static int _addto_state_char_list(List char_list, char *names) +{ + int i=0, start=0, c; + char *name = NULL, *tmp_char = NULL; + ListIterator itr = NULL; + char quote_c = '\0'; + int quote = 0; + int count = 0; + + if(!char_list) { + error("No list was given to fill in"); + return 0; + } + + itr = list_iterator_create(char_list); + if(names) { + if (names[i] == '\"' || names[i] == '\'') { + quote_c = names[i]; + quote = 1; + i++; + } + start = i; + while(names[i]) { + //info("got %d - %d = %d", i, start, i-start); + if(quote && names[i] == quote_c) + break; + else if (names[i] == '\"' || names[i] == '\'') + names[i] = '`'; + else if(names[i] == ',') { + if((i-start) > 0) { + name = xmalloc((i-start+1)); + memcpy(name, names+start, (i-start)); + c = decode_state_char(name); + if (c == -1) + fatal("unrecognized job " + "state value"); + xfree(name); + name = xstrdup_printf("%d", c); + + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + list_append(char_list, name); + count++; + } else + xfree(name); + list_iterator_reset(itr); + } + i++; + start = i; + if(!names[i]) { + info("There is a problem with " + "your request. It appears you " + "have spaces inside your list."); + break; + } + } + i++; + } + if((i-start) > 0) { + name = xmalloc((i-start)+1); + memcpy(name, names+start, (i-start)); + c = decode_state_char(name); + if (c == -1) + fatal("unrecognized job state value"); + xfree(name); + name = xstrdup_printf("%d", c); + + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + list_append(char_list, name); + count++; + } else + xfree(name); + } + } + list_iterator_destroy(itr); + return count; +} + +/* returns number of objects added to list */ +static int _addto_job_list(List job_list, char *names) +{ + int i=0, start=0; + char *name = NULL, *dot = NULL; + jobacct_selected_step_t *selected_step = NULL; + jobacct_selected_step_t *curr_step = NULL; + + ListIterator itr = NULL; + char quote_c = '\0'; + int quote = 0; + int count = 0; + + if(!job_list) { + error("No list was given to fill in"); + return 0; + } + + itr = list_iterator_create(job_list); + if(names) { + if (names[i] == '\"' || names[i] == '\'') { + quote_c = names[i]; + quote = 1; + i++; + } + start = i; + while(names[i]) { + //info("got %d - %d = %d", i, start, i-start); + if(quote && names[i] == quote_c) + break; + else if (names[i] == '\"' || names[i] == '\'') + names[i] = '`'; + else if(names[i] == ',') { + if((i-start) > 0) { + char *dot = NULL; + name = xmalloc((i-start+1)); + memcpy(name, names+start, (i-start)); + + selected_step = xmalloc( + sizeof(jobacct_selected_step_t)); + dot = strstr(name, "."); + if (dot == NULL) { + debug2("No jobstep requested"); + selected_step->stepid = NO_VAL; + } else { + *dot++ = 0; + selected_step->stepid = + atoi(dot); + } + selected_step->jobid = atoi(name); + xfree(name); + + while((curr_step = list_next(itr))) { + if((curr_step->jobid + == selected_step->jobid) + && (curr_step->stepid + == selected_step-> + stepid)) + break; + } + + if(!curr_step) { + list_append(job_list, + selected_step); + count++; + } else + destroy_jobacct_selected_step( + selected_step); + list_iterator_reset(itr); + } + i++; + start = i; + } + i++; + } + if((i-start) > 0) { + name = xmalloc((i-start)+1); + memcpy(name, names+start, (i-start)); + + selected_step = + xmalloc(sizeof(jobacct_selected_step_t)); + dot = strstr(name, "."); + if (dot == NULL) { + debug2("No jobstep requested"); + selected_step->stepid = NO_VAL; + } else { + *dot++ = 0; + selected_step->stepid = atoi(dot); + } + selected_step->jobid = atoi(name); + xfree(name); + + while((curr_step = list_next(itr))) { + if((curr_step->jobid == selected_step->jobid) + && (curr_step->stepid + == selected_step->stepid)) + break; + } + + if(!curr_step) { + list_append(job_list, selected_step); + count++; + } else + destroy_jobacct_selected_step( + selected_step); + } + } + list_iterator_destroy(itr); + return count; +} + void _help_msg(void) { slurm_ctl_conf_t *conf = slurm_conf_lock(); @@ -203,47 +511,26 @@ void _usage(void) void _init_params() { - params.opt_cluster = slurm_get_cluster_name(); /* --cluster */ - params.opt_completion = 0; /* --completion */ - params.opt_dump = 0; /* --dump */ - params.opt_dup = -1; /* --duplicates; +1 = explicitly set */ - params.opt_fdump = 0; /* --formattted_dump */ - params.opt_stat = 0; /* --stat */ - params.opt_gid = -1; /* --gid (-1=wildcard, 0=root) */ - params.opt_header = 1; /* can only be cleared */ - params.opt_help = 0; /* --help */ - params.opt_long = 0; /* --long */ - params.opt_lowmem = 0; /* --low_memory */ - params.opt_purge = 0; /* --purge */ - params.opt_total = 0; /* --total */ - params.opt_uid = -1; /* --uid (-1=wildcard, 0=root) */ - params.opt_uid_set = 0; - params.opt_verbose = 0; /* --verbose */ - params.opt_expire_timespec = NULL; /* --expire= */ - params.opt_field_list = NULL; /* --fields= */ - params.opt_filein = NULL; /* --file */ - params.opt_job_list = NULL; /* --jobs */ - params.opt_partition_list = NULL;/* --partitions */ - params.opt_state_list = NULL; /* --states */ + memset(¶ms, 0, sizeof(sacct_parameters_t)); } int decode_state_char(char *state) { - if (!strcasecmp(state, "p")) + if (!strncasecmp(state, "p", 1)) return JOB_PENDING; /* we should never see this */ - else if (!strcasecmp(state, "r")) + else if (!strncasecmp(state, "r", 1)) return JOB_RUNNING; - else if (!strcasecmp(state, "su")) + else if (!strncasecmp(state, "su", 1)) return JOB_SUSPENDED; - else if (!strcasecmp(state, "cd")) + else if (!strncasecmp(state, "cd", 2)) return JOB_COMPLETE; - else if (!strcasecmp(state, "ca")) + else if (!strncasecmp(state, "ca", 2)) return JOB_CANCELLED; - else if (!strcasecmp(state, "f")) + else if (!strncasecmp(state, "f", 1)) return JOB_FAILED; - else if (!strcasecmp(state, "to")) + else if (!strncasecmp(state, "to", 1)) return JOB_TIMEOUT; - else if (!strcasecmp(state, "nf")) + else if (!strncasecmp(state, "nf", 1)) return JOB_NODE_FAIL; else return -1; // unknown @@ -258,13 +545,26 @@ int get_data(void) ListIterator itr_step = NULL; if(params.opt_completion) { - jobs = g_slurm_jobcomp_get_jobs(selected_steps, - selected_parts, ¶ms); + jobs = g_slurm_jobcomp_get_jobs(params.opt_job_list, + params.opt_partition_list, + ¶ms); return SLURM_SUCCESS; } else { - jobs = jobacct_storage_g_get_jobs(acct_db_conn, - selected_steps, - selected_parts, ¶ms); + acct_job_cond_t *job_cond = xmalloc(sizeof(acct_job_cond_t)); + + job_cond->acct_list = params.opt_acct_list; + job_cond->cluster_list = params.opt_cluster_list; + job_cond->duplicates = params.opt_dup; + job_cond->groupid_list = params.opt_gid_list; + job_cond->partition_list = params.opt_partition_list; + job_cond->step_list = params.opt_job_list; + job_cond->state_list = params.opt_state_list; + job_cond->usage_start = params.opt_begin; + job_cond->usage_end = params.opt_end; + job_cond->userid_list = params.opt_uid_list; + + jobs = jobacct_storage_g_get_jobs_cond(acct_db_conn, job_cond); + destroy_acct_job_cond(job_cond); } if (params.opt_fdump) @@ -321,19 +621,22 @@ void parse_command_line(int argc, char **argv) struct stat stat_buf; char *dot = NULL; bool brief_output = FALSE, long_output = FALSE; + bool all_users = 0; static struct option long_options[] = { {"all", 0,0, 'a'}, + {"accounts", 1, 0, 'A'}, + {"begin", 1, 0, 'B'}, {"brief", 0, 0, 'b'}, {"cluster", 1, 0, 'C'}, {"completion", 0, ¶ms.opt_completion, 'c'}, {"duplicates", 0, ¶ms.opt_dup, 1}, {"dump", 0, 0, 'd'}, + {"end", 1, 0, 'E'}, {"expire", 1, 0, 'e'}, {"fields", 1, 0, 'F'}, {"file", 1, 0, 'f'}, {"formatted_dump", 0, 0, 'O'}, - {"stat", 0, 0, 'S'}, {"gid", 1, 0, 'g'}, {"group", 1, 0, 'g'}, {"help", 0, ¶ms.opt_help, 1}, @@ -342,10 +645,11 @@ void parse_command_line(int argc, char **argv) {"long", 0, 0, 'l'}, {"big_logfile", 0, ¶ms.opt_lowmem, 1}, {"noduplicates", 0, ¶ms.opt_dup, 0}, - {"noheader", 0, ¶ms.opt_header, 0}, + {"noheader", 0, ¶ms.opt_noheader, 1}, {"partition", 1, 0, 'p'}, {"purge", 0, 0, 'P'}, {"state", 1, 0, 's'}, + {"stat", 0, 0, 'S'}, {"total", 0, 0, 't'}, {"uid", 1, 0, 'u'}, {"usage", 0, ¶ms.opt_help, 3}, @@ -356,30 +660,40 @@ void parse_command_line(int argc, char **argv) _init_params(); - if ((i=getuid())) - /* default to current user unless root*/ - params.opt_uid = i; + params.opt_uid = getuid(); + params.opt_gid = getgid(); opterr = 1; /* Let getopt report problems to the user */ while (1) { /* now cycle through the command line */ - c = getopt_long(argc, argv, "abcC:de:F:f:g:hj:J:lOPp:s:StUu:Vv", + c = getopt_long(argc, argv, "aA:bB:cC:de:E:F:f:g:hj:lOPp:s:StUu:Vv", long_options, &optionIndex); if (c == -1) break; switch (c) { case 'a': - params.opt_uid = -1; + all_users = 1; + break; + case 'A': + if(!params.opt_acct_list) + params.opt_acct_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(params.opt_acct_list, optarg); break; case 'b': brief_output = true; break; + case 'B': + params.opt_begin = parse_time(optarg); + break; case 'c': params.opt_completion = 1; break; case 'C': - xfree(params.opt_cluster); - params.opt_cluster = xstrdup(optarg); + if(!params.opt_cluster_list) + params.opt_cluster_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(params.opt_cluster_list, optarg); break; case 'd': params.opt_dump = 1; @@ -429,9 +743,11 @@ void parse_command_line(int argc, char **argv) exit(1); } } - params.opt_uid = -1; /* fix default; can't purge by uid */ break; - + + case 'E': + params.opt_end = parse_time(optarg); + break; case 'F': if(params.opt_stat) xfree(params.opt_field_list); @@ -452,18 +768,10 @@ void parse_command_line(int argc, char **argv) break; case 'g': - if (isdigit((int) *optarg)) - params.opt_gid = atoi(optarg); - else { - struct group *grp; - if ((grp=getgrnam(optarg))==NULL) { - fprintf(stderr, - "Invalid group id: %s\n", - optarg); - exit(1); - } - params.opt_gid=grp->gr_gid; - } + if(!params.opt_gid_list) + params.opt_gid_list = + list_create(slurm_destroy_char); + _addto_id_char_list(params.opt_gid_list, optarg, 1); break; case 'h': @@ -478,13 +786,11 @@ void parse_command_line(int argc, char **argv) optarg); exit(1); } - params.opt_job_list = - xrealloc(params.opt_job_list, - (params.opt_job_list==NULL? 0 : - strlen(params.opt_job_list)) + - strlen(optarg) + 1); - strcat(params.opt_job_list, optarg); - strcat(params.opt_job_list, ","); + + if(!params.opt_job_list) + params.opt_job_list = list_create( + destroy_jobacct_selected_step); + _addto_job_list(params.opt_job_list, optarg); break; case 'l': @@ -500,25 +806,20 @@ void parse_command_line(int argc, char **argv) break; case 'p': - params.opt_partition_list = - xrealloc(params.opt_partition_list, - (params.opt_partition_list==NULL? 0 : - strlen(params.opt_partition_list)) + - strlen(optarg) + 1); - strcat(params.opt_partition_list, optarg); - strcat(params.opt_partition_list, ","); - break; + if(!params.opt_partition_list) + params.opt_partition_list = + list_create(slurm_destroy_char); - case 's': - params.opt_state_list = - xrealloc(params.opt_state_list, - (params.opt_state_list==NULL? 0 : - strlen(params.opt_state_list)) + - strlen(optarg) + 1); - strcat(params.opt_state_list, optarg); - strcat(params.opt_state_list, ","); + slurm_addto_char_list(params.opt_partition_list, + optarg); break; + case 's': + if(!params.opt_state_list) + params.opt_state_list = + list_create(slurm_destroy_char); + _addto_state_char_list(params.opt_state_list, optarg); + break; case 'S': if(!params.opt_field_list) { params.opt_field_list = @@ -538,18 +839,10 @@ void parse_command_line(int argc, char **argv) break; case 'u': - if (isdigit((int) *optarg) || atoi(optarg) == -1) - params.opt_uid = atoi(optarg); - else { - struct passwd *pwd; - if ((pwd=getpwnam(optarg))==NULL) { - fprintf(stderr, - "Invalid user id: %s\n", - optarg); - exit(1); - } - params.opt_uid=pwd->pw_uid; - } + if(!params.opt_uid_list) + params.opt_uid_list = + list_create(slurm_destroy_char); + _addto_id_char_list(params.opt_uid_list, optarg, 0); break; case 'v': @@ -584,14 +877,13 @@ void parse_command_line(int argc, char **argv) /* Now set params.opt_dup, unless they've already done so */ if (params.opt_dup < 0) /* not already set explicitly */ - if (params.opt_job_list) - /* They probably want the most recent job N if - * they requested specific jobs or steps. */ - params.opt_dup = 0; + params.opt_dup = 0; + + if (params.opt_fdump) + params.opt_dup |= FDUMP_FLAG; if (params.opt_verbose) { fprintf(stderr, "Options selected:\n" - "\topt_cluster=%s\n" "\topt_completion=%d\n" "\topt_dump=%d\n" "\topt_dup=%d\n" @@ -600,18 +892,13 @@ void parse_command_line(int argc, char **argv) "\topt_stat=%d\n" "\topt_field_list=%s\n" "\topt_filein=%s\n" - "\topt_header=%d\n" + "\topt_noheader=%d\n" "\topt_help=%d\n" - "\topt_job_list=%s\n" "\topt_long=%d\n" "\topt_lowmem=%d\n" - "\topt_partition_list=%s\n" "\topt_purge=%d\n" - "\topt_state_list=%s\n" "\topt_total=%d\n" - "\topt_uid=%d\n" "\topt_verbose=%d\n", - params.opt_cluster, params.opt_completion, params.opt_dump, params.opt_dup, @@ -620,16 +907,12 @@ void parse_command_line(int argc, char **argv) params.opt_stat, params.opt_field_list, params.opt_filein, - params.opt_header, + params.opt_noheader, params.opt_help, - params.opt_job_list, params.opt_long, params.opt_lowmem, - params.opt_partition_list, params.opt_purge, - params.opt_state_list, params.opt_total, - params.opt_uid, params.opt_verbose); } @@ -665,98 +948,101 @@ void parse_command_line(int argc, char **argv) xfree(acct_type); } - /* specific partitions requested? */ - if (params.opt_partition_list) { - - start = params.opt_partition_list; - while ((end = strstr(start, ",")) && start) { - *end = 0; - while (isspace(*start)) - start++; /* discard whitespace */ - if(!(int)*start) - continue; - acct_type = xstrdup(start); - list_append(selected_parts, acct_type); - start = end + 1; + /* specific clusters requested? */ + if (params.opt_verbose && params.opt_cluster_list + && list_count(params.opt_cluster_list)) { + fprintf(stderr, "Clusters requested:\n"); + itr = list_iterator_create(params.opt_cluster_list); + while((start = list_next(itr))) + fprintf(stderr, "\t: %s\n", start); + list_iterator_destroy(itr); + } else if(!params.opt_cluster_list + || !list_count(params.opt_cluster_list)) { + if(!params.opt_cluster_list) + params.opt_cluster_list = + list_create(slurm_destroy_char); + if((start = slurm_get_cluster_name())) + list_append(params.opt_cluster_list, start); + if(params.opt_verbose) { + fprintf(stderr, "Clusters requested:\n"); + fprintf(stderr, "\t: %s\n", start); } - if (params.opt_verbose) { - fprintf(stderr, "Partitions requested:\n"); - itr = list_iterator_create(selected_parts); - while((start = list_next(itr))) - fprintf(stderr, "\t: %s\n", start); - list_iterator_destroy(itr); + } + + if(all_users) { + if(params.opt_uid_list + && list_count(params.opt_uid_list)) { + list_destroy(params.opt_uid_list); + params.opt_uid_list = NULL; + } + if(params.opt_verbose) + fprintf(stderr, "Userids requested:\n\t: all\n"); + } else if (params.opt_verbose && params.opt_uid_list + && list_count(params.opt_uid_list)) { + fprintf(stderr, "Userids requested:\n"); + itr = list_iterator_create(params.opt_uid_list); + while((start = list_next(itr))) + fprintf(stderr, "\t: %s\n", start); + list_iterator_destroy(itr); + } else if(!params.opt_uid_list + || !list_count(params.opt_uid_list)) { + if(!params.opt_uid_list) + params.opt_uid_list = + list_create(slurm_destroy_char); + start = xstrdup_printf("%u", params.opt_uid); + list_append(params.opt_uid_list, start); + if(params.opt_verbose) { + fprintf(stderr, "Userids requested:\n"); + fprintf(stderr, "\t: %s\n", start); } } + if (params.opt_verbose && params.opt_gid_list + && list_count(params.opt_gid_list)) { + fprintf(stderr, "Groupids requested:\n"); + itr = list_iterator_create(params.opt_gid_list); + while((start = list_next(itr))) + fprintf(stderr, "\t: %s\n", start); + list_iterator_destroy(itr); + } + + /* specific partitions requested? */ + if (params.opt_verbose && params.opt_partition_list + && list_count(params.opt_partition_list)) { + fprintf(stderr, "Partitions requested:\n"); + itr = list_iterator_create(params.opt_partition_list); + while((start = list_next(itr))) + fprintf(stderr, "\t: %s\n", start); + list_iterator_destroy(itr); + } + /* specific jobs requested? */ - if (params.opt_job_list) { - start = params.opt_job_list; - while ((end = strstr(start, ",")) && start) { - *end = 0; - while (isspace(*start)) - start++; /* discard whitespace */ - if(!(int)*start) - continue; - selected_step = - xmalloc(sizeof(jobacct_selected_step_t)); - list_append(selected_steps, selected_step); - - dot = strstr(start, "."); - if (dot == NULL) { - debug2("No jobstep requested"); - selected_step->step = NULL; - selected_step->stepid = (uint32_t)NO_VAL; - } else { - *dot++ = 0; - selected_step->step = xstrdup(dot); - selected_step->stepid = atoi(dot); - } - selected_step->job = xstrdup(start); - selected_step->jobid = atoi(start); - start = end + 1; - } - if (params.opt_verbose) { - fprintf(stderr, "Jobs requested:\n"); - itr = list_iterator_create(selected_steps); - while((selected_step = list_next(itr))) { - if(selected_step->step) - fprintf(stderr, "\t: %s.%s\n", - selected_step->job, - selected_step->step); - else - fprintf(stderr, "\t: %s\n", - selected_step->job); - } - list_iterator_destroy(itr); + if (params.opt_verbose && params.opt_job_list + && list_count(params.opt_job_list)) { + fprintf(stderr, "Jobs requested:\n"); + itr = list_iterator_create(params.opt_job_list); + while((selected_step = list_next(itr))) { + if(selected_step->stepid != NO_VAL) + fprintf(stderr, "\t: %d.%d\n", + selected_step->jobid, + selected_step->stepid); + else + fprintf(stderr, "\t: %d\n", + selected_step->jobid); } + list_iterator_destroy(itr); } /* specific states (completion state) requested? */ - if (params.opt_state_list) { - start = params.opt_state_list; - while ((end = strstr(start, ",")) && start) { - int c; - *end = 0; - while (isspace(*start)) - start++; /* discard whitespace */ - if(!(int)*start) - continue; - c = decode_state_char(start); - if (c == -1) - fatal("unrecognized job state value"); - selected_state[c] = 1; - start = end + 1; - } - if (params.opt_verbose) { - fprintf(stderr, "States requested:\n"); - for(i=0; i< STATE_COUNT; i++) { - if(selected_state[i]) { - fprintf(stderr, "\t: %s\n", - job_state_string(i)); - break; - } - } + if (params.opt_verbose && params.opt_state_list + && list_count(params.opt_state_list)) { + fprintf(stderr, "States requested:\n"); + itr = list_iterator_create(params.opt_state_list); + while((start = list_next(itr))) { + fprintf(stderr, "\t: %s\n", + job_state_string(atoi(start))); } + list_iterator_destroy(itr); } /* select the output fields */ @@ -1054,18 +1340,13 @@ void do_dump_completion(void) /* do_expire() -- purge expired data from the accounting log file */ -void do_expire(int dummy) +void do_expire() { - if (dummy == NO_VAL) { - /* just load the symbol, don't want to execute */ - slurm_reconfigure(); - } - if(params.opt_completion) - g_slurm_jobcomp_archive(selected_parts, ¶ms); + g_slurm_jobcomp_archive(params.opt_partition_list, ¶ms); else jobacct_storage_g_archive(acct_db_conn, - selected_parts, ¶ms); + params.opt_partition_list, ¶ms); } void do_help(void) @@ -1107,13 +1388,6 @@ void do_list(void) do_jobsteps = 0; itr = list_iterator_create(jobs); while((job = list_next(itr))) { - /* This is really handled when we got the data except - for the filetxt plugin so keep it here. - */ - if (params.opt_uid >= 0 && (job->uid != params.opt_uid)) - continue; - if (params.opt_gid >= 0 && (job->gid != params.opt_gid)) - continue; if(job->sacct.min_cpu == NO_VAL) job->sacct.min_cpu = 0; @@ -1125,20 +1399,12 @@ void do_list(void) } if (job->show_full) { - if (params.opt_state_list) { - if(!selected_state[job->state]) - continue; - } print_fields(JOB, job); } if (do_jobsteps && (job->track_steps || !job->show_full)) { itr_step = list_iterator_create(job->steps); while((step = list_next(itr_step))) { - if (params.opt_state_list) { - if(!selected_state[step->state]) - continue; - } if(step->end == 0) step->end = job->end; step->account = job->account; @@ -1165,10 +1431,6 @@ void do_list_completion(void) itr = list_iterator_create(jobs); while((job = list_next(itr))) { - if (params.opt_uid >= 0 && (job->uid != params.opt_uid)) - continue; - if (params.opt_gid >= 0 && (job->gid != params.opt_gid)) - continue; print_fields(JOBCOMP, job); } list_iterator_destroy(itr); @@ -1177,36 +1439,33 @@ void do_list_completion(void) void do_stat() { ListIterator itr = NULL; - uint32_t jobid = 0; uint32_t stepid = 0; jobacct_selected_step_t *selected_step = NULL; - - itr = list_iterator_create(selected_steps); + + if(!params.opt_job_list || !list_count(params.opt_job_list)) { + fprintf(stderr, "No job list given to stat.\n"); + return; + } + + itr = list_iterator_create(params.opt_job_list); while((selected_step = list_next(itr))) { - jobid = atoi(selected_step->job); - if(selected_step->step) - stepid = atoi(selected_step->step); + if(selected_step->stepid != NO_VAL) + stepid = selected_step->stepid; else stepid = 0; - sacct_stat(jobid, stepid); + sacct_stat(selected_step->jobid, stepid); } list_iterator_destroy(itr); } + void sacct_init() { - int i=0; - selected_parts = list_create(slurm_destroy_char); - selected_steps = list_create(destroy_jobacct_selected_step); - for(i=0; i<STATE_COUNT; i++) - selected_state[i] = 0; } void sacct_fini() { if(jobs) list_destroy(jobs); - list_destroy(selected_parts); - list_destroy(selected_steps); if(params.opt_completion) g_slurm_jobcomp_fini(); else { diff --git a/src/sacct/sacct.c b/src/sacct/sacct.c index 79c6981c3bca7000053ea8a84c02f3b71ad63fea..f1b968ddaab95f299f31df9c8c67e98b124d0e2d 100644 --- a/src/sacct/sacct.c +++ b/src/sacct/sacct.c @@ -257,16 +257,12 @@ int main(int argc, char **argv) "\topt_total=%d\n" "\topt_field_list=%s\n" "\topt_gid=%d\n" - "\topt_uid=%d\n" - "\topt_job_list=%s\n" - "\topt_state_list=%s\n", + "\topt_uid=%d\n", params.opt_long, params.opt_total, params.opt_field_list, params.opt_gid, - params.opt_uid, - params.opt_job_list, - params.opt_state_list); + params.opt_uid); invalidSwitchCombo("--expire", "--brief, --long, --fields, " "--total, --gid, --uid, --jobs, " @@ -288,14 +284,14 @@ int main(int argc, char **argv) do_dump(); break; case SACCT_EXPIRE: - do_expire(0); + do_expire(); break; case SACCT_FDUMP: if(get_data() == SLURM_ERROR) exit(errno); break; case SACCT_LIST: - if (params.opt_header) /* give them something to look */ + if (!params.opt_noheader)/* give them something to look */ _print_header();/* at while we think... */ if(get_data() == SLURM_ERROR) exit(errno); @@ -310,7 +306,7 @@ int main(int argc, char **argv) "in the future please make note this will " "not be supported.\n"); - if (params.opt_header) /* give them something to look */ + if (!params.opt_noheader)/* give them something to look */ _print_header();/* at while we think... */ do_stat(); break; diff --git a/src/sacct/sacct.h b/src/sacct/sacct.h index c340bc9eeaa06877c1d729ebc2674e00a3d486a6..23966f2d2520ac0bacf635d9f9677c2b5ab7d8bd 100644 --- a/src/sacct/sacct.h +++ b/src/sacct/sacct.h @@ -159,7 +159,7 @@ int get_data(void); void parse_command_line(int argc, char **argv); void do_dump(void); void do_dump_completion(void); -void do_expire(int dummy); +void do_expire(); void do_help(void); void do_list(void); void do_list_completion(void); diff --git a/src/sacctmgr/Makefile.am b/src/sacctmgr/Makefile.am index 8ff241fbb587069055029d643cf27ce76280be55..800497fbfdb213753a92602d88c5523b71075e86 100644 --- a/src/sacctmgr/Makefile.am +++ b/src/sacctmgr/Makefile.am @@ -1,11 +1,16 @@ # Makefile for sacctmgr AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) bin_PROGRAMS = sacctmgr +sacctmgr_LDADD = \ + $(top_builddir)/src/api/libslurm.o -ldl\ + $(READLINE_LIBS) + sacctmgr_SOURCES = \ account_functions.c \ association_functions.c \ @@ -13,15 +18,12 @@ sacctmgr_SOURCES = \ common.c \ file_functions.c \ sacctmgr.c sacctmgr.h \ + qos_functions.c \ + txn_functions.c \ user_functions.c -sacctmgr_LDADD = \ - $(top_builddir)/src/common/libcommon.o -ldl \ - $(top_builddir)/src/api/libslurmhelper.la \ - $(READLINE_LIBS) - sacctmgr_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) force: -$(convenience_libs) : force +$(sacctmgr_LDADD) : force @cd `dirname $@` && $(MAKE) `basename $@` diff --git a/src/sacctmgr/Makefile.in b/src/sacctmgr/Makefile.in index 1a61db6f7dde9714520190e2acc11553ebc8833d..c909565315361cf363bd558e87a1aa77f1181d98 100644 --- a/src/sacctmgr/Makefile.in +++ b/src/sacctmgr/Makefile.in @@ -73,11 +73,11 @@ PROGRAMS = $(bin_PROGRAMS) am_sacctmgr_OBJECTS = account_functions.$(OBJEXT) \ association_functions.$(OBJEXT) cluster_functions.$(OBJEXT) \ common.$(OBJEXT) file_functions.$(OBJEXT) sacctmgr.$(OBJEXT) \ + qos_functions.$(OBJEXT) txn_functions.$(OBJEXT) \ user_functions.$(OBJEXT) sacctmgr_OBJECTS = $(am_sacctmgr_OBJECTS) am__DEPENDENCIES_1 = -sacctmgr_DEPENDENCIES = $(top_builddir)/src/common/libcommon.o \ - $(top_builddir)/src/api/libslurmhelper.la \ +sacctmgr_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o \ $(am__DEPENDENCIES_1) sacctmgr_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sacctmgr_LDFLAGS) \ @@ -267,7 +267,12 @@ target_vendor = @target_vendor@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) +sacctmgr_LDADD = \ + $(top_builddir)/src/api/libslurm.o -ldl\ + $(READLINE_LIBS) + sacctmgr_SOURCES = \ account_functions.c \ association_functions.c \ @@ -275,13 +280,10 @@ sacctmgr_SOURCES = \ common.c \ file_functions.c \ sacctmgr.c sacctmgr.h \ + qos_functions.c \ + txn_functions.c \ user_functions.c -sacctmgr_LDADD = \ - $(top_builddir)/src/common/libcommon.o -ldl \ - $(top_builddir)/src/api/libslurmhelper.la \ - $(READLINE_LIBS) - sacctmgr_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) all: all-am @@ -359,7 +361,9 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cluster_functions.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/common.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/file_functions.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/qos_functions.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sacctmgr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/txn_functions.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/user_functions.Po@am__quote@ .c.o: @@ -486,6 +490,7 @@ install-strip: mostlyclean-generic: clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) @@ -569,7 +574,7 @@ uninstall-am: uninstall-binPROGRAMS force: -$(convenience_libs) : force +$(sacctmgr_LDADD) : force @cd `dirname $@` && $(MAKE) `basename $@` # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. diff --git a/src/sacctmgr/account_functions.c b/src/sacctmgr/account_functions.c index 43df7c633d120743e6488820b7f726e3a047d2ea..6a83719b581e33ee8528ff72dd3c450cb7a425f9 100644 --- a/src/sacctmgr/account_functions.c +++ b/src/sacctmgr/account_functions.c @@ -46,57 +46,104 @@ static int _set_cond(int *start, int argc, char *argv[], int a_set = 0; int u_set = 0; int end = 0; + List qos_list = NULL; + + if(!acct_cond) { + exit_code=1; + fprintf(stderr, "No acct_cond given"); + return -1; + } + + if(!acct_cond->assoc_cond) { + acct_cond->assoc_cond = + xmalloc(sizeof(acct_association_cond_t)); + acct_cond->assoc_cond->fairshare = NO_VAL; + acct_cond->assoc_cond->max_cpu_secs_per_job = NO_VAL; + acct_cond->assoc_cond->max_jobs = NO_VAL; + acct_cond->assoc_cond->max_nodes_per_job = NO_VAL; + acct_cond->assoc_cond->max_wall_duration_per_job = NO_VAL; + } for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Set", 3) == 0) { + if (!strncasecmp (argv[i], "Set", 3)) { i--; break; - } else if (strncasecmp (argv[i], "WithAssoc", 5) == 0) { + } else if (!strncasecmp (argv[i], "WithAssoc", 5)) { acct_cond->with_assocs = 1; - } else if (strncasecmp (argv[i], "WithCoordinators", 5) == 0) { + } else if (!strncasecmp (argv[i], "WithCoordinators", 5)) { acct_cond->with_coords = 1; } else if(!end && !strncasecmp(argv[i], "where", 5)) { continue; - } else if(!end) { - addto_char_list(acct_cond->acct_list, argv[i]); - addto_char_list(acct_cond->assoc_cond->acct_list, - argv[i]); - u_set = 1; - } else if (strncasecmp (argv[i], "Clusters", 1) == 0) { - addto_char_list(acct_cond->assoc_cond->cluster_list, - argv[i]+end); - a_set = 1; - } else if (strncasecmp (argv[i], "Descriptions", 1) == 0) { - addto_char_list(acct_cond->description_list, - argv[i]+end); - u_set = 1; - } else if (strncasecmp (argv[i], "Format", 1) == 0) { + } else if(!end + || !strncasecmp (argv[i], "Names", 1) + || !strncasecmp (argv[i], "Accouts", 1)) { + if(!acct_cond->assoc_cond->acct_list) { + acct_cond->assoc_cond->acct_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list( + acct_cond->assoc_cond->acct_list, + argv[i]+end)) + u_set = 1; + } else if (!strncasecmp (argv[i], "Clusters", 1)) { + if(!acct_cond->assoc_cond->cluster_list) { + acct_cond->assoc_cond->cluster_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list( + acct_cond->assoc_cond->cluster_list, + argv[i]+end)) + a_set = 1; + } else if (!strncasecmp (argv[i], "Descriptions", 1)) { + if(!acct_cond->description_list) { + acct_cond->description_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list(acct_cond->description_list, + argv[i]+end)) + u_set = 1; + } else if (!strncasecmp (argv[i], "Format", 1)) { if(format_list) - addto_char_list(format_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Names", 1) == 0 - || strncasecmp (argv[i], "Accouts", 1) == 0) { - addto_char_list(acct_cond->acct_list, argv[i]+end); - addto_char_list(acct_cond->assoc_cond->acct_list, - argv[i]); - u_set = 1; - } else if (strncasecmp (argv[i], "Organizations", 1) == 0) { - addto_char_list(acct_cond->organization_list, - argv[i]+end); - u_set = 1; - } else if (strncasecmp (argv[i], "Parent", 1) == 0) { + slurm_addto_char_list(format_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Organizations", 1)) { + if(!acct_cond->organization_list) { + acct_cond->organization_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list(acct_cond->organization_list, + argv[i]+end)) + u_set = 1; + } else if (!strncasecmp (argv[i], "Parent", 1)) { acct_cond->assoc_cond->parent_acct = strip_quotes(argv[i]+end, NULL); a_set = 1; - } else if (strncasecmp (argv[i], "QosLevel", 1) == 0) { - acct_cond->qos = str_2_acct_qos(argv[i]+end); + } else if (!strncasecmp (argv[i], "QosLevel", 1)) { + int option = 0; + if(!acct_cond->qos_list) { + acct_cond->qos_list = + list_create(slurm_destroy_char); + } + + if(!qos_list) { + qos_list = acct_storage_g_get_qos( + db_conn, NULL); + } + + addto_qos_char_list(acct_cond->qos_list, qos_list, + argv[i]+end, option); u_set = 1; } else { - printf(" Unknown condition: %s\n" + exit_code=1; + fprintf(stderr, " Unknown condition: %s\n" " Use keyword 'set' to modify " "SLURM_PRINT_VALUE\n", argv[i]); } } + + if(qos_list) + list_destroy(qos_list); + (*start) = i; if(a_set) @@ -115,61 +162,86 @@ static int _set_rec(int *start, int argc, char *argv[], int u_set = 0; int a_set = 0; int end = 0; + List qos_list = NULL; for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Where", 5) == 0) { + if (!strncasecmp (argv[i], "Where", 5)) { i--; break; } else if(!end && !strncasecmp(argv[i], "set", 3)) { continue; } else if(!end) { - printf(" Bad format on %s: End your option with " + exit_code=1; + fprintf(stderr, + " Bad format on %s: End your option with " "an '=' sign\n", argv[i]); - } else if (strncasecmp (argv[i], "Description", 1) == 0) { + } else if (!strncasecmp (argv[i], "Description", 1)) { acct->description = strip_quotes(argv[i]+end, NULL); u_set = 1; - } else if (strncasecmp (argv[i], "FairShare", 1) == 0) { + } else if (!strncasecmp (argv[i], "FairShare", 1)) { if (get_uint(argv[i]+end, &assoc->fairshare, - "FairShare") == SLURM_SUCCESS) + "FairShare") == SLURM_SUCCESS) a_set = 1; - } else if (strncasecmp (argv[i], "MaxCPUSec", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxCPUSec", 4)) { if (get_uint(argv[i]+end, &assoc->max_cpu_secs_per_job, - "MaxCPUSec") == SLURM_SUCCESS) + "MaxCPUSec") == SLURM_SUCCESS) a_set = 1; - } else if (strncasecmp (argv[i], "MaxJobs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxJobs", 4)) { if (get_uint(argv[i]+end, &assoc->max_jobs, - "MaxJobs") == SLURM_SUCCESS) + "MaxJobs") == SLURM_SUCCESS) a_set = 1; - } else if (strncasecmp (argv[i], "MaxNodes", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxNodes", 4)) { if (get_uint(argv[i]+end, &assoc->max_nodes_per_job, - "MaxNodes") == SLURM_SUCCESS) + "MaxNodes") == SLURM_SUCCESS) a_set = 1; - } else if (strncasecmp (argv[i], "MaxWall", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxWall", 4)) { mins = time_str2mins(argv[i]+end); if (mins != NO_VAL) { assoc->max_wall_duration_per_job = (uint32_t) mins; a_set = 1; } else { - printf(" Bad MaxWall time format: %s\n", + exit_code=1; + fprintf(stderr, + " Bad MaxWall time format: %s\n", argv[i]); } - } else if (strncasecmp (argv[i], "Organization", 1) == 0) { + } else if (!strncasecmp (argv[i], "Organization", 1)) { acct->organization = strip_quotes(argv[i]+end, NULL); u_set = 1; - } else if (strncasecmp (argv[i], "Parent", 1) == 0) { + } else if (!strncasecmp (argv[i], "Parent", 1)) { assoc->parent_acct = strip_quotes(argv[i]+end, NULL); a_set = 1; - } else if (strncasecmp (argv[i], "QosLevel=", 1) == 0) { - acct->qos = str_2_acct_qos(argv[i]+end); + } else if (!strncasecmp (argv[i], "QosLevel=", 1)) { + int option = 0; + if(!acct->qos_list) { + acct->qos_list = + list_create(slurm_destroy_char); + } + + if(!qos_list) { + qos_list = acct_storage_g_get_qos( + db_conn, NULL); + } + if(end > 2 && argv[i][end-1] == '=' + && (argv[i][end-2] == '+' + || argv[i][end-2] == '-')) + option = (int)argv[i][end-2]; + + addto_qos_char_list(acct->qos_list, qos_list, + argv[i]+end, option); u_set = 1; } else { - printf(" Unknown option: %s\n" - " Use keyword 'where' to modify condition\n", - argv[i]); + exit_code=1; + fprintf(stderr, " Unknown option: %s\n" + " Use keyword 'where' to modify condition\n", + argv[i]); } } + if(qos_list) + list_destroy(qos_list); + (*start) = i; if(u_set && a_set) @@ -196,7 +268,8 @@ extern int sacctmgr_add_account(int argc, char *argv[]) char *parent = NULL; char *cluster = NULL; char *name = NULL; - acct_qos_level_t qos = ACCT_QOS_NOTSET; + List add_qos_list = NULL; + List qos_list = NULL; List acct_list = NULL; List assoc_list = NULL; List local_assoc_list = NULL; @@ -213,46 +286,60 @@ extern int sacctmgr_add_account(int argc, char *argv[]) for (i=0; i<argc; i++) { int end = parse_option_end(argv[i]); if(!end) { - addto_char_list(name_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Cluster", 1) == 0) { - addto_char_list(cluster_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Description", 1) == 0) { + slurm_addto_char_list(name_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Cluster", 1)) { + slurm_addto_char_list(cluster_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Description", 1)) { description = strip_quotes(argv[i]+end, NULL); - } else if (strncasecmp (argv[i], "FairShare", 1) == 0) { + } else if (!strncasecmp (argv[i], "FairShare", 1)) { if (get_uint(argv[i]+end, &fairshare, "FairShare") == SLURM_SUCCESS) limit_set = 1; - } else if (strncasecmp (argv[i], "MaxCPUSecs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxCPUSecs", 4)) { if (get_uint(argv[i]+end, &max_cpu_secs_per_job, "MaxCPUSecs") == SLURM_SUCCESS) limit_set = 1; - } else if (strncasecmp (argv[i], "MaxJobs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxJobs", 4)) { if (get_uint(argv[i]+end, &max_jobs, "MaxJobs") == SLURM_SUCCESS) limit_set = 1; - } else if (strncasecmp (argv[i], "MaxNodes", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxNodes", 4)) { if (get_uint(argv[i]+end, &max_nodes_per_job, "MaxNodes") == SLURM_SUCCESS) limit_set = 1; - } else if (strncasecmp (argv[i], "MaxWall", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxWall", 4)) { mins = time_str2mins(argv[i]+end); if (mins != NO_VAL) { max_wall_duration_per_job = (uint32_t) mins; limit_set = 1; } else { - printf(" Bad MaxWall time format: %s\n", + exit_code=1; + fprintf(stderr, + " Bad MaxWall time format: %s\n", argv[i]); } - } else if (strncasecmp (argv[i], "Names", 1) == 0) { - addto_char_list(name_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Organization", 1) == 0) { + } else if (!strncasecmp (argv[i], "Names", 1)) { + slurm_addto_char_list(name_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Organization", 1)) { organization = strip_quotes(argv[i]+end, NULL); - } else if (strncasecmp (argv[i], "Parent", 1) == 0) { + } else if (!strncasecmp (argv[i], "Parent", 1)) { parent = strip_quotes(argv[i]+end, NULL); - } else if (strncasecmp (argv[i], "QosLevel", 1) == 0) { - qos = str_2_acct_qos(argv[i]+end); + } else if (!strncasecmp (argv[i], "QosLevel", 1)) { + int option = 0; + if(!add_qos_list) { + add_qos_list = + list_create(slurm_destroy_char); + } + + if(!qos_list) { + qos_list = acct_storage_g_get_qos( + db_conn, NULL); + } + addto_qos_char_list(add_qos_list, qos_list, + argv[i]+end, option); } else { - printf(" Unknown option: %s\n", argv[i]); + exit_code=1; + fprintf(stderr, " Unknown option: %s\n", argv[i]); } } @@ -262,21 +349,26 @@ extern int sacctmgr_add_account(int argc, char *argv[]) xfree(parent); xfree(description); xfree(organization); - printf(" Need name of account to add.\n"); + exit_code=1; + fprintf(stderr, " Need name of account to add.\n"); return SLURM_SUCCESS; } else { acct_account_cond_t account_cond; + acct_association_cond_t assoc_cond; memset(&account_cond, 0, sizeof(acct_account_cond_t)); - account_cond.acct_list = name_list; + memset(&assoc_cond, 0, sizeof(acct_association_cond_t)); + assoc_cond.acct_list = name_list; + account_cond.assoc_cond = &assoc_cond; local_account_list = acct_storage_g_get_accounts( db_conn, &account_cond); } if(!local_account_list) { - printf(" Problem getting accounts from database. " - "Contact your admin.\n"); + exit_code=1; + fprintf(stderr, " Problem getting accounts from database. " + "Contact your admin.\n"); list_destroy(name_list); list_destroy(cluster_list); xfree(parent); @@ -294,7 +386,9 @@ extern int sacctmgr_add_account(int argc, char *argv[]) temp_list = acct_storage_g_get_clusters(db_conn, NULL); if(!cluster_list) { - printf(" Problem getting clusters from database. " + exit_code=1; + fprintf(stderr, + " Problem getting clusters from database. " "Contact your admin.\n"); list_destroy(name_list); list_destroy(cluster_list); @@ -312,8 +406,11 @@ extern int sacctmgr_add_account(int argc, char *argv[]) list_iterator_destroy(itr_c); if(!list_count(cluster_list)) { - printf(" Can't add accounts, no cluster defined yet.\n" - " Please contact your administrator.\n"); + exit_code=1; + fprintf(stderr, + " Can't add accounts, no cluster " + "defined yet.\n" + " Please contact your administrator.\n"); list_destroy(name_list); list_destroy(cluster_list); list_destroy(local_account_list); @@ -342,7 +439,8 @@ extern int sacctmgr_add_account(int argc, char *argv[]) break; } if(!cluster_rec) { - printf(" error: This cluster '%s' " + exit_code=1; + fprintf(stderr, " This cluster '%s' " "doesn't exist.\n" " Contact your admin " "to add it to accounting.\n", @@ -380,7 +478,8 @@ extern int sacctmgr_add_account(int argc, char *argv[]) db_conn, &assoc_cond); list_destroy(assoc_cond.acct_list); if(!local_assoc_list) { - printf(" Problem getting associations from database. " + exit_code=1; + fprintf(stderr, " Problem getting associations from database. " "Contact your admin.\n"); list_destroy(name_list); list_destroy(cluster_list); @@ -409,8 +508,18 @@ extern int sacctmgr_add_account(int argc, char *argv[]) acct->organization = xstrdup(parent); else acct->organization = xstrdup(name); - - acct->qos = qos; + if(add_qos_list && list_count(add_qos_list)) { + char *tmp_qos = NULL; + ListIterator qos_itr = + list_iterator_create(add_qos_list); + acct->qos_list = + list_create(slurm_destroy_char); + while((tmp_qos = list_next(qos_itr))) { + list_append(acct->qos_list, + xstrdup(tmp_qos)); + } + list_iterator_destroy(qos_itr); + } xstrfmtcat(acct_str, " %s\n", name); list_append(acct_list, acct); } @@ -424,7 +533,8 @@ extern int sacctmgr_add_account(int argc, char *argv[]) } if(!sacctmgr_find_account_base_assoc_from_list( local_assoc_list, parent, cluster)) { - printf(" error: Parent account '%s' " + exit_code=1; + fprintf(stderr, " Parent account '%s' " "doesn't exist on " "cluster %s\n" " Contact your admin " @@ -467,7 +577,8 @@ extern int sacctmgr_add_account(int argc, char *argv[]) printf(" Nothing new added.\n"); goto end_it; } else if(!assoc_str) { - printf(" Error: no associations created.\n"); + exit_code=1; + fprintf(stderr, " No associations created.\n"); goto end_it; } @@ -485,8 +596,14 @@ extern int sacctmgr_add_account(int argc, char *argv[]) printf(" Organization = %s\n", "Parent/Account Name"); - if(qos != ACCT_QOS_NOTSET) - printf(" Qos = %s\n", acct_qos_str(qos)); + if(add_qos_list) { + char *temp_char = get_qos_complete_str( + qos_list, add_qos_list); + if(temp_char) { + printf(" Qos = %s\n", temp_char); + xfree(temp_char); + } + } xfree(acct_str); } @@ -538,7 +655,8 @@ extern int sacctmgr_add_account(int argc, char *argv[]) rc = acct_storage_g_add_associations(db_conn, my_uid, assoc_list); } else { - printf(" error: Problem adding accounts\n"); + exit_code=1; + fprintf(stderr, " Problem adding accounts\n"); rc = SLURM_ERROR; notice_thread_fini(); goto end_it; @@ -553,11 +671,15 @@ extern int sacctmgr_add_account(int argc, char *argv[]) acct_storage_g_commit(db_conn, 0); } } else { - printf(" error: Problem adding account associations\n"); + exit_code=1; + fprintf(stderr, + " error: Problem adding account associations\n"); rc = SLURM_ERROR; } end_it: + if(add_qos_list) + list_destroy(add_qos_list); list_destroy(acct_list); list_destroy(assoc_list); @@ -580,6 +702,7 @@ extern int sacctmgr_list_account(int argc, char *argv[]) acct_account_rec_t *acct = NULL; acct_association_rec_t *assoc = NULL; char *object; + List qos_list = NULL; print_field_t *field = NULL; @@ -599,7 +722,6 @@ extern int sacctmgr_list_account(int argc, char *argv[]) PRINT_MAXW, PRINT_ORG, PRINT_QOS, - PRINT_QOS_GOLD, PRINT_QOS_RAW, PRINT_PID, PRINT_PNAME, @@ -607,37 +729,25 @@ extern int sacctmgr_list_account(int argc, char *argv[]) PRINT_USER }; - acct_cond->acct_list = list_create(slurm_destroy_char); - acct_cond->description_list = list_create(slurm_destroy_char); - acct_cond->organization_list = list_create(slurm_destroy_char); acct_cond->with_assocs = with_assoc_flag; - acct_cond->assoc_cond = xmalloc(sizeof(acct_association_cond_t)); - acct_cond->assoc_cond->user_list = list_create(slurm_destroy_char); - acct_cond->assoc_cond->acct_list = list_create(slurm_destroy_char); - acct_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); - acct_cond->assoc_cond->partition_list = list_create(slurm_destroy_char); - _set_cond(&i, argc, argv, acct_cond, format_list); - if(!list_count(format_list)) { - addto_char_list(format_list, "A,D,O,Q"); + if(exit_code) { + destroy_acct_account_cond(acct_cond); + list_destroy(format_list); + return SLURM_ERROR; + } else if(!list_count(format_list)) { + slurm_addto_char_list(format_list, "A,D,O,Q"); if(acct_cond->with_assocs) - addto_char_list(format_list, + slurm_addto_char_list(format_list, "Cl,ParentN,U,F,MaxC,MaxJ,MaxN,MaxW"); if(acct_cond->with_coords) - addto_char_list(format_list, "Coord"); + slurm_addto_char_list(format_list, "Coord"); } - acct_list = acct_storage_g_get_accounts(db_conn, acct_cond); - destroy_acct_account_cond(acct_cond); - if(!acct_list) { - printf(" Problem with query.\n"); - list_destroy(format_list); - return SLURM_ERROR; - } print_fields_list = list_create(destroy_print_field); itr = list_iterator_create(format_list); @@ -698,21 +808,16 @@ extern int sacctmgr_list_account(int argc, char *argv[]) field->name = xstrdup("Org"); field->len = 20; field->print_routine = print_fields_str; - } else if(!strncasecmp("QOSGOLD", object, 4)) { - field->type = PRINT_QOS_GOLD; - field->name = xstrdup("QOS_GOLD"); - field->len = 7; - field->print_routine = print_fields_uint; } else if(!strncasecmp("QOSRAW", object, 4)) { field->type = PRINT_QOS_RAW; field->name = xstrdup("QOS_RAW"); - field->len = 7; - field->print_routine = print_fields_uint; + field->len = 10; + field->print_routine = print_fields_char_list; } else if(!strncasecmp("QOS", object, 1)) { field->type = PRINT_QOS; field->name = xstrdup("QOS"); - field->len = 9; - field->print_routine = print_fields_str; + field->len = 20; + field->print_routine = sacctmgr_print_qos_list; } else if(!strncasecmp("ParentID", object, 7)) { field->type = PRINT_PID; field->name = xstrdup("Par ID"); @@ -729,7 +834,8 @@ extern int sacctmgr_list_account(int argc, char *argv[]) field->len = 10; field->print_routine = print_fields_str; } else { - printf("Unknown field '%s'\n", object); + exit_code=1; + fprintf(stderr, "Unknown field '%s'\n", object); xfree(field); continue; } @@ -738,6 +844,22 @@ extern int sacctmgr_list_account(int argc, char *argv[]) list_iterator_destroy(itr); list_destroy(format_list); + if(exit_code) { + destroy_acct_account_cond(acct_cond); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + + acct_list = acct_storage_g_get_accounts(db_conn, acct_cond); + destroy_acct_account_cond(acct_cond); + + if(!acct_list) { + exit_code=1; + fprintf(stderr, " Problem with query.\n"); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + itr = list_iterator_create(acct_list); itr2 = list_iterator_create(print_fields_list); print_fields_header(print_fields_list); @@ -752,107 +874,97 @@ extern int sacctmgr_list_account(int argc, char *argv[]) switch(field->type) { case PRINT_ACCOUNT: field->print_routine( - SLURM_PRINT_VALUE, field, acct->name); break; case PRINT_CLUSTER: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->cluster); break; case PRINT_COORDS: field->print_routine( - SLURM_PRINT_VALUE, field, acct->coordinators); break; case PRINT_DESC: field->print_routine( - SLURM_PRINT_VALUE, field, acct->description); break; case PRINT_FAIRSHARE: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->fairshare); break; case PRINT_ID: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->id); break; case PRINT_MAXC: field->print_routine( - SLURM_PRINT_VALUE, field, assoc-> max_cpu_secs_per_job); break; case PRINT_MAXJ: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->max_jobs); break; case PRINT_MAXN: field->print_routine( - SLURM_PRINT_VALUE, field, assoc-> max_nodes_per_job); break; case PRINT_MAXW: field->print_routine( - SLURM_PRINT_VALUE, field, assoc-> max_wall_duration_per_job); break; case PRINT_ORG: field->print_routine( - SLURM_PRINT_VALUE, field, acct->organization); break; case PRINT_QOS: + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } field->print_routine( - SLURM_PRINT_VALUE, field, - acct_qos_str( - acct->qos)); - break; - case PRINT_QOS_GOLD: - field->print_routine( - SLURM_PRINT_VALUE, - field, - acct->qos-1); + qos_list, + acct->qos_list); break; case PRINT_QOS_RAW: + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } field->print_routine( - SLURM_PRINT_VALUE, field, - acct->qos); + qos_list, + acct->qos_list); break; case PRINT_PID: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->parent_id); break; case PRINT_PNAME: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->parent_acct); break; case PRINT_PART: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->partition); break; case PRINT_USER: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->user); break; default: @@ -868,93 +980,85 @@ extern int sacctmgr_list_account(int argc, char *argv[]) switch(field->type) { case PRINT_ACCOUNT: field->print_routine( - SLURM_PRINT_VALUE, field, acct->name); break; case PRINT_CLUSTER: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_COORDS: field->print_routine( - SLURM_PRINT_VALUE, field, acct->coordinators); break; case PRINT_DESC: field->print_routine( - SLURM_PRINT_VALUE, field, acct->description); break; case PRINT_FAIRSHARE: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_ID: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_MAXC: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_MAXJ: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_MAXN: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_MAXW: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_ORG: field->print_routine( - SLURM_PRINT_VALUE, field, acct->organization); break; case PRINT_QOS: + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } field->print_routine( - SLURM_PRINT_VALUE, - field, acct_qos_str(acct->qos)); - break; - case PRINT_QOS_GOLD: - field->print_routine( - SLURM_PRINT_VALUE, field, - acct->qos-1); + field, qos_list, + acct->qos_list); break; case PRINT_QOS_RAW: + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } field->print_routine( - SLURM_PRINT_VALUE, field, - acct->qos); + field, qos_list, + acct->qos_list); break; case PRINT_PID: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_PNAME: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_PART: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; case PRINT_USER: field->print_routine( - SLURM_PRINT_VALUE, field, NULL); break; default: @@ -986,19 +1090,6 @@ extern int sacctmgr_modify_account(int argc, char *argv[]) int cond_set = 0, rec_set = 0, set = 0; List ret_list = NULL; - acct_cond->acct_list = list_create(slurm_destroy_char); - acct_cond->description_list = list_create(slurm_destroy_char); - acct_cond->organization_list = list_create(slurm_destroy_char); - - acct_cond->assoc_cond = xmalloc(sizeof(acct_association_cond_t)); - acct_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); - acct_cond->assoc_cond->acct_list = list_create(slurm_destroy_char); - acct_cond->assoc_cond->fairshare = NO_VAL; - acct_cond->assoc_cond->max_cpu_secs_per_job = NO_VAL; - acct_cond->assoc_cond->max_jobs = NO_VAL; - acct_cond->assoc_cond->max_nodes_per_job = NO_VAL; - acct_cond->assoc_cond->max_wall_duration_per_job = NO_VAL; - assoc->fairshare = NO_VAL; assoc->max_cpu_secs_per_job = NO_VAL; assoc->max_jobs = NO_VAL; @@ -1006,10 +1097,10 @@ extern int sacctmgr_modify_account(int argc, char *argv[]) assoc->max_wall_duration_per_job = NO_VAL; for (i=0; i<argc; i++) { - if (strncasecmp (argv[i], "Where", 5) == 0) { + if (!strncasecmp (argv[i], "Where", 5)) { i++; cond_set = _set_cond(&i, argc, argv, acct_cond, NULL); - } else if (strncasecmp (argv[i], "Set", 3) == 0) { + } else if (!strncasecmp (argv[i], "Set", 3)) { i++; rec_set = _set_rec(&i, argc, argv, acct, assoc); } else { @@ -1018,7 +1109,8 @@ extern int sacctmgr_modify_account(int argc, char *argv[]) } if(!rec_set) { - printf(" You didn't give me anything to set\n"); + exit_code=1; + fprintf(stderr, " You didn't give me anything to set\n"); destroy_acct_account_cond(acct_cond); destroy_acct_account_rec(acct); destroy_acct_association_rec(assoc); @@ -1050,11 +1142,13 @@ extern int sacctmgr_modify_account(int argc, char *argv[]) printf(" %s\n", object); } list_iterator_destroy(itr); + set = 1; } else if(ret_list) { printf(" Nothing modified\n"); rc = SLURM_ERROR; } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } @@ -1079,7 +1173,8 @@ assoc_start: } else if(ret_list) { printf(" Nothing modified\n"); } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } @@ -1110,25 +1205,40 @@ extern int sacctmgr_delete_account(int argc, char *argv[]) xmalloc(sizeof(acct_account_cond_t)); int i=0; List ret_list = NULL; + ListIterator itr = NULL; int set = 0; - acct_cond->acct_list = list_create(slurm_destroy_char); - acct_cond->description_list = list_create(slurm_destroy_char); - acct_cond->organization_list = list_create(slurm_destroy_char); - - acct_cond->assoc_cond = xmalloc(sizeof(acct_association_cond_t)); - acct_cond->assoc_cond->user_list = list_create(slurm_destroy_char); - acct_cond->assoc_cond->acct_list = list_create(slurm_destroy_char); - acct_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); - acct_cond->assoc_cond->partition_list = - list_create(slurm_destroy_char); - if(!(set = _set_cond(&i, argc, argv, acct_cond, NULL))) { - printf(" No conditions given to remove, not executing.\n"); + exit_code=1; + fprintf(stderr, + " No conditions given to remove, not executing.\n"); destroy_acct_account_cond(acct_cond); return SLURM_ERROR; } + /* check to see if person is trying to remove root account. This is + * bad, and should not be allowed outside of deleting a cluster. + */ + if(acct_cond->assoc_cond + && acct_cond->assoc_cond->acct_list + && list_count(acct_cond->assoc_cond->acct_list)) { + char *tmp_char = NULL; + itr = list_iterator_create(acct_cond->assoc_cond->acct_list); + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, "root")) + break; + } + list_iterator_destroy(itr); + if(tmp_char) { + exit_code=1; + fprintf(stderr, " You are not allowed to remove " + "the root account.\n" + " Use remove cluster instead.\n"); + destroy_acct_account_cond(acct_cond); + return SLURM_ERROR; + } + } + notice_thread_init(); if(set == 1) { ret_list = acct_storage_g_remove_accounts( @@ -1161,7 +1271,8 @@ extern int sacctmgr_delete_account(int argc, char *argv[]) } else if(ret_list) { printf(" Nothing deleted\n"); } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } diff --git a/src/sacctmgr/association_functions.c b/src/sacctmgr/association_functions.c index 9a754fdefdff2d679e196ec4b4abf0899b64b6b7..461a19fdb3164da20555c9a13c16598bde95b2b9 100644 --- a/src/sacctmgr/association_functions.c +++ b/src/sacctmgr/association_functions.c @@ -123,44 +123,62 @@ static int _set_cond(int *start, int argc, char *argv[], for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (!end && strncasecmp (argv[i], "Tree", 4) == 0) { + if (!end && !strncasecmp (argv[i], "Tree", 4)) { tree_display = 1; + } else if (!end && !strncasecmp (argv[i], "WithDeleted", 5)) { + association_cond->with_deleted = 1; + } else if (!end && !strncasecmp (argv[i], "WOPInfo", 4)) { + association_cond->without_parent_info = 1; + } else if (!end && !strncasecmp (argv[i], "WOPLimits", 4)) { + association_cond->without_parent_limits = 1; } else if(!end && !strncasecmp(argv[i], "where", 5)) { continue; - } else if(!end) { - addto_char_list(association_cond->id_list, argv[i]); + } else if(!end || !strncasecmp (argv[i], "Id", 1) + || !strncasecmp (argv[i], "Associations", 2)) { + if(!association_cond->id_list) + association_cond->id_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(association_cond->id_list, + argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Id", 1) == 0) { - addto_char_list(association_cond->id_list, argv[i]+end); - set = 1; - } else if (strncasecmp (argv[i], "Associations", 2) == 0) { - addto_char_list(association_cond->id_list, argv[i]+end); - set = 1; - } else if (strncasecmp (argv[i], "Users", 1) == 0) { - addto_char_list(association_cond->user_list, + } else if (!strncasecmp (argv[i], "Users", 1)) { + if(!association_cond->user_list) + association_cond->user_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(association_cond->user_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Accounts", 2) == 0) { - addto_char_list(association_cond->acct_list, + } else if (!strncasecmp (argv[i], "Accounts", 2)) { + if(!association_cond->acct_list) + association_cond->acct_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(association_cond->acct_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Clusters", 1) == 0) { - addto_char_list(association_cond->cluster_list, + } else if (!strncasecmp (argv[i], "Clusters", 1)) { + if(!association_cond->cluster_list) + association_cond->cluster_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(association_cond->cluster_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Format", 1) == 0) { + } else if (!strncasecmp (argv[i], "Format", 1)) { if(format_list) - addto_char_list(format_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Partitions", 4) == 0) { - addto_char_list(association_cond->partition_list, + slurm_addto_char_list(format_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Partitions", 4)) { + if(!association_cond->partition_list) + association_cond->partition_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(association_cond->partition_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Parent", 4) == 0) { + } else if (!strncasecmp (argv[i], "Parent", 4)) { association_cond->parent_acct = strip_quotes(argv[i]+end, NULL); set = 1; } else { - printf(" Unknown condition: %s\n", argv[i]); + exit_code = 1; + fprintf(stderr, " Unknown condition: %s\n", argv[i]); } } (*start) = i; @@ -332,6 +350,7 @@ extern int sacctmgr_list_association(int argc, char *argv[]) PRINT_CLUSTER, PRINT_FAIRSHARE, PRINT_ID, + PRINT_LFT, PRINT_MAXC, PRINT_MAXJ, PRINT_MAXN, @@ -339,31 +358,22 @@ extern int sacctmgr_list_association(int argc, char *argv[]) PRINT_PID, PRINT_PNAME, PRINT_PART, + PRINT_RGT, PRINT_USER }; - assoc_cond->id_list = list_create(slurm_destroy_char); - assoc_cond->user_list = list_create(slurm_destroy_char); - assoc_cond->acct_list = list_create(slurm_destroy_char); - assoc_cond->cluster_list = list_create(slurm_destroy_char); - _set_cond(&i, argc, argv, assoc_cond, format_list); - assoc_list = acct_storage_g_get_associations(db_conn, assoc_cond); - destroy_acct_association_cond(assoc_cond); - - if(!assoc_list) { - printf(" Problem with query.\n"); + if(exit_code) { + destroy_acct_association_cond(assoc_cond); list_destroy(format_list); return SLURM_ERROR; - } + } else if(!list_count(format_list)) + slurm_addto_char_list(format_list, + "C,A,U,F,MaxC,MaxJ,MaxN,MaxW"); + print_fields_list = list_create(destroy_print_field); - first_list = assoc_list; - assoc_list = _sort_assoc_list(first_list); - if(!list_count(format_list)) - addto_char_list(format_list, "C,A,U,F,MaxC,MaxJ,MaxN,MaxW"); - itr = list_iterator_create(format_list); while((object = list_next(itr))) { field = xmalloc(sizeof(print_field_t)); @@ -390,6 +400,11 @@ extern int sacctmgr_list_association(int argc, char *argv[]) field->name = xstrdup("ID"); field->len = 6; field->print_routine = print_fields_uint; + } else if(!strncasecmp("LFT", object, 1)) { + field->type = PRINT_LFT; + field->name = xstrdup("LFT"); + field->len = 6; + field->print_routine = print_fields_uint; } else if(!strncasecmp("MaxCPUSecs", object, 4)) { field->type = PRINT_MAXC; field->name = xstrdup("MaxCPUSecs"); @@ -425,13 +440,20 @@ extern int sacctmgr_list_association(int argc, char *argv[]) field->name = xstrdup("Partition"); field->len = 10; field->print_routine = print_fields_str; + } else if(!strncasecmp("RGT", object, 1)) { + field->type = PRINT_RGT; + field->name = xstrdup("RGT"); + field->len = 6; + field->print_routine = print_fields_uint; } else if(!strncasecmp("User", object, 1)) { field->type = PRINT_USER; field->name = xstrdup("User"); field->len = 10; field->print_routine = print_fields_str; } else { - printf("Unknown field '%s'\n", object); + exit_code=1; + fprintf(stderr, "Unknown field '%s'\n", object); + exit(1); xfree(field); continue; } @@ -440,6 +462,24 @@ extern int sacctmgr_list_association(int argc, char *argv[]) list_iterator_destroy(itr); list_destroy(format_list); + if(exit_code) { + destroy_acct_association_cond(assoc_cond); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + + assoc_list = acct_storage_g_get_associations(db_conn, assoc_cond); + destroy_acct_association_cond(assoc_cond); + + if(!assoc_list) { + exit_code=1; + fprintf(stderr, " Problem with query.\n"); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + first_list = assoc_list; + assoc_list = _sort_assoc_list(first_list); + itr = list_iterator_create(assoc_list); itr2 = list_iterator_create(print_fields_list); print_fields_header(print_fields_list); @@ -477,53 +517,61 @@ extern int sacctmgr_list_association(int argc, char *argv[]) } else { print_acct = assoc->acct; } - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, print_acct); break; case PRINT_CLUSTER: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->cluster); break; case PRINT_FAIRSHARE: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->fairshare); break; case PRINT_ID: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->id); break; + case PRINT_LFT: + field->print_routine(field, + assoc->lft); + break; case PRINT_MAXC: field->print_routine( - SLURM_PRINT_VALUE, field, + field, assoc->max_cpu_secs_per_job); break; case PRINT_MAXJ: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->max_jobs); break; case PRINT_MAXN: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->max_nodes_per_job); break; case PRINT_MAXW: field->print_routine( - SLURM_PRINT_VALUE, field, + field, assoc->max_wall_duration_per_job); break; case PRINT_PID: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->parent_id); break; case PRINT_PNAME: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->parent_acct); break; case PRINT_PART: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->partition); break; + case PRINT_RGT: + field->print_routine(field, + assoc->rgt); + break; case PRINT_USER: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->user); break; default: diff --git a/src/sacctmgr/cluster_functions.c b/src/sacctmgr/cluster_functions.c index 2c0ac1653ba115fd781da37c8e798336c1e274c1..3a39ddb3211c33b77298c6968c7f36b4140c7f54 100644 --- a/src/sacctmgr/cluster_functions.c +++ b/src/sacctmgr/cluster_functions.c @@ -49,24 +49,25 @@ static int _set_cond(int *start, int argc, char *argv[], for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Set", 3) == 0) { + if (!strncasecmp (argv[i], "Set", 3)) { i--; break; } else if(!end && !strncasecmp(argv[i], "where", 5)) { continue; - } else if(!end) { - addto_char_list(cluster_list, argv[i]); - set = 1; - } else if (strncasecmp (argv[i], "Format", 1) == 0) { + } else if(!end || !strncasecmp (argv[i], "Names", 1)) { + if(cluster_list) { + if(slurm_addto_char_list(cluster_list, + argv[i]+end)) + set = 1; + } + } else if (!strncasecmp (argv[i], "Format", 1)) { if(format_list) - addto_char_list(format_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Names", 1) == 0) { - addto_char_list(cluster_list, - argv[i]+end); - set = 1; + slurm_addto_char_list(format_list, argv[i]+end); } else { - printf(" Unknown condition: %s\n" - "Use keyword set to modify value\n", argv[i]); + exit_code=1; + fprintf(stderr, " Unknown condition: %s\n" + "Use keyword set to modify value\n", argv[i]); + break; } } (*start) = i; @@ -83,46 +84,51 @@ static int _set_rec(int *start, int argc, char *argv[], for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Where", 5) == 0) { + if (!strncasecmp (argv[i], "Where", 5)) { i--; break; } else if(!end && !strncasecmp(argv[i], "set", 3)) { continue; } else if(!end) { - printf(" Bad format on %s: End your option with " + exit_code=1; + fprintf(stderr, + " Bad format on %s: End your option with " "an '=' sign\n", argv[i]); - } else if (strncasecmp (argv[i], "FairShare", 1) == 0) { + } else if (!strncasecmp (argv[i], "FairShare", 1)) { if (get_uint(argv[i]+end, &assoc->fairshare, "FairShare") == SLURM_SUCCESS) set = 1; - } else if (strncasecmp (argv[i], "MaxJobs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxJobs", 4)) { if (get_uint(argv[i]+end, &assoc->max_jobs, "MaxJobs") == SLURM_SUCCESS) set = 1; - } else if (strncasecmp (argv[i], "MaxNodes", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxNodes", 4)) { if (get_uint(argv[i]+end, &assoc->max_nodes_per_job, "MaxNodes") == SLURM_SUCCESS) set = 1; - } else if (strncasecmp (argv[i], "MaxWall", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxWall", 4)) { mins = time_str2mins(argv[i]+end); if (mins != NO_VAL) { assoc->max_wall_duration_per_job = (uint32_t) mins; set = 1; } else { - printf(" Bad MaxWall time format: %s\n", + exit_code=1; + fprintf(stderr, + " Bad MaxWall time format: %s\n", argv[i]); } - } else if (strncasecmp (argv[i], "MaxCPUSecs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxCPUSecs", 4)) { if (get_uint(argv[i]+end, &assoc->max_cpu_secs_per_job, "MaxCPUSecs") == SLURM_SUCCESS) set = 1; } else { - printf(" Unknown option: %s\n" - " Use keyword 'where' to modify condition\n", - argv[i]); + exit_code=1; + fprintf(stderr, " Unknown option: %s\n" + " Use keyword 'where' to modify condition\n", + argv[i]); } } (*start) = i; @@ -151,38 +157,45 @@ extern int sacctmgr_add_cluster(int argc, char *argv[]) for (i=0; i<argc; i++) { int end = parse_option_end(argv[i]); if(!end) { - addto_char_list(name_list, argv[i]+end); - } else if (strncasecmp (argv[i], "FairShare", 1) == 0) { + slurm_addto_char_list(name_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "FairShare", 1)) { fairshare = atoi(argv[i]+end); limit_set = 1; - } else if (strncasecmp (argv[i], "MaxCPUSecs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxCPUSecs", 4)) { max_cpu_secs_per_job = atoi(argv[i]+end); limit_set = 1; - } else if (strncasecmp (argv[i], "MaxJobs=", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxJobs=", 4)) { max_jobs = atoi(argv[i]+end); limit_set = 1; - } else if (strncasecmp (argv[i], "MaxNodes", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxNodes", 4)) { max_nodes_per_job = atoi(argv[i]+end); limit_set = 1; - } else if (strncasecmp (argv[i], "MaxWall", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxWall", 4)) { mins = time_str2mins(argv[i]+end); if (mins != NO_VAL) { max_wall_duration_per_job = (uint32_t) mins; limit_set = 1; } else { - printf(" Bad MaxWall time format: %s\n", + exit_code=1; + fprintf(stderr, + " Bad MaxWall time format: %s\n", argv[i]); } - } else if (strncasecmp (argv[i], "Names", 1) == 0) { - addto_char_list(name_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Names", 1)) { + slurm_addto_char_list(name_list, argv[i]+end); } else { - printf(" Unknown option: %s\n", argv[i]); + exit_code=1; + fprintf(stderr, " Unknown option: %s\n", argv[i]); } } - if(!list_count(name_list)) { + if(exit_code) { list_destroy(name_list); - printf(" Need name of cluster to add.\n"); + return SLURM_ERROR; + } else if(!list_count(name_list)) { + list_destroy(name_list); + exit_code=1; + fprintf(stderr, " Need name of cluster to add.\n"); return SLURM_ERROR; } else { List temp_list = NULL; @@ -194,8 +207,10 @@ extern int sacctmgr_add_cluster(int argc, char *argv[]) temp_list = acct_storage_g_get_clusters(db_conn, &cluster_cond); if(!temp_list) { - printf(" Problem getting clusters from database. " - "Contact your admin.\n"); + exit_code=1; + fprintf(stderr, + " Problem getting clusters from database. " + "Contact your admin.\n"); return SLURM_ERROR; } @@ -293,7 +308,8 @@ extern int sacctmgr_add_cluster(int argc, char *argv[]) acct_storage_g_commit(db_conn, 0); } } else { - printf(" error: problem adding clusters\n"); + exit_code=1; + fprintf(stderr, " Problem adding clusters\n"); } end_it: list_destroy(cluster_list); @@ -332,12 +348,8 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) cluster_cond->cluster_list = list_create(slurm_destroy_char); _set_cond(&i, argc, argv, cluster_cond->cluster_list, format_list); - - cluster_list = acct_storage_g_get_clusters(db_conn, cluster_cond); - destroy_acct_cluster_cond(cluster_cond); - - if(!cluster_list) { - printf(" Problem with query.\n"); + if(exit_code) { + destroy_acct_cluster_cond(cluster_cond); list_destroy(format_list); return SLURM_ERROR; } @@ -345,8 +357,9 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) print_fields_list = list_create(destroy_print_field); if(!list_count(format_list)) { - addto_char_list(format_list, - "Cl,Controlh,Controlp,F,MaxC,MaxJ,MaxN,MaxW"); + slurm_addto_char_list(format_list, + "Cl,Controlh,Controlp,F,MaxC," + "MaxJ,MaxN,MaxW"); } itr = list_iterator_create(format_list); @@ -393,7 +406,8 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) field->len = 11; field->print_routine = print_fields_time; } else { - printf("Unknown field '%s'\n", object); + exit_code=1; + fprintf(stderr, "Unknown field '%s'\n", object); xfree(field); continue; } @@ -402,6 +416,22 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) list_iterator_destroy(itr); list_destroy(format_list); + if(exit_code) { + destroy_acct_cluster_cond(cluster_cond); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + + cluster_list = acct_storage_g_get_clusters(db_conn, cluster_cond); + destroy_acct_cluster_cond(cluster_cond); + + if(!cluster_list) { + exit_code=1; + fprintf(stderr, " Problem with query.\n"); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + itr = list_iterator_create(cluster_list); itr2 = list_iterator_create(print_fields_list); print_fields_header(print_fields_list); @@ -410,40 +440,40 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) while((field = list_next(itr2))) { switch(field->type) { case PRINT_CLUSTER: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, cluster->name); break; case PRINT_CHOST: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, cluster->control_host); break; case PRINT_CPORT: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, cluster->control_port); break; case PRINT_FAIRSHARE: field->print_routine( - SLURM_PRINT_VALUE, field, + field, cluster->default_fairshare); break; case PRINT_MAXC: field->print_routine( - SLURM_PRINT_VALUE, field, + field, cluster->default_max_cpu_secs_per_job); break; case PRINT_MAXJ: field->print_routine( - SLURM_PRINT_VALUE, field, + field, cluster->default_max_jobs); break; case PRINT_MAXN: field->print_routine( - SLURM_PRINT_VALUE, field, + field, cluster->default_max_nodes_per_job); break; case PRINT_MAXW: field->print_routine( - SLURM_PRINT_VALUE, field, + field, cluster-> default_max_wall_duration_per_job); break; @@ -489,12 +519,12 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[]) assoc->max_wall_duration_per_job = NO_VAL; for (i=0; i<argc; i++) { - if (strncasecmp (argv[i], "Where", 5) == 0) { + if (!strncasecmp (argv[i], "Where", 5)) { i++; if(_set_cond(&i, argc, argv, assoc_cond->cluster_list, NULL)) cond_set = 1; - } else if (strncasecmp (argv[i], "Set", 3) == 0) { + } else if (!strncasecmp (argv[i], "Set", 3)) { i++; if(_set_rec(&i, argc, argv, assoc)) rec_set = 1; @@ -506,7 +536,8 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[]) } if(!rec_set) { - printf(" You didn't give me anything to set\n"); + exit_code=1; + fprintf(stderr, " You didn't give me anything to set\n"); destroy_acct_association_rec(assoc); destroy_acct_association_cond(assoc_cond); return SLURM_ERROR; @@ -518,6 +549,10 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[]) destroy_acct_association_cond(assoc_cond); return SLURM_SUCCESS; } + } else if(exit_code) { + destroy_acct_association_rec(assoc); + destroy_acct_association_cond(assoc_cond); + return SLURM_ERROR; } printf(" Setting\n"); @@ -573,7 +608,8 @@ extern int sacctmgr_modify_cluster(int argc, char *argv[]) } else if(ret_list) { printf(" Nothing modified\n"); } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } @@ -606,7 +642,9 @@ extern int sacctmgr_delete_cluster(int argc, char *argv[]) cluster_cond->cluster_list = list_create(slurm_destroy_char); if(!_set_cond(&i, argc, argv, cluster_cond->cluster_list, NULL)) { - printf(" No conditions given to remove, not executing.\n"); + exit_code=1; + fprintf(stderr, + " No conditions given to remove, not executing.\n"); destroy_acct_cluster_cond(cluster_cond); return SLURM_ERROR; } @@ -639,7 +677,8 @@ extern int sacctmgr_delete_cluster(int argc, char *argv[]) } else if(ret_list) { printf(" Nothing deleted\n"); } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } @@ -666,32 +705,40 @@ extern int sacctmgr_dump_cluster (int argc, char *argv[]) int end = parse_option_end(argv[i]); if(!end) { if(cluster_name) { - printf(" Can only do one cluster at a time. " + exit_code=1; + fprintf(stderr, + " Can only do one cluster at a time. " "Already doing %s\n", cluster_name); continue; } cluster_name = xstrdup(argv[i]+end); - } else if (strncasecmp (argv[i], "File", 1) == 0) { + } else if (!strncasecmp (argv[i], "File", 1)) { if(file_name) { - printf(" File name already set to %s\n", - file_name); + exit_code=1; + fprintf(stderr, + " File name already set to %s\n", + file_name); continue; } file_name = xstrdup(argv[i]+end); - } else if (strncasecmp (argv[i], "Name", 1) == 0) { + } else if (!strncasecmp (argv[i], "Name", 1)) { if(cluster_name) { - printf(" Can only do one cluster at a time. " - "Already doing %s\n", cluster_name); + exit_code=1; + fprintf(stderr, + " Can only do one cluster at a time. " + "Already doing %s\n", cluster_name); continue; } cluster_name = xstrdup(argv[i]+end); } else { - printf(" Unknown option: %s\n", argv[i]); + exit_code=1; + fprintf(stderr, " Unknown option: %s\n", argv[i]); } } if(!cluster_name) { - printf(" We need a cluster to dump.\n"); + exit_code=1; + fprintf(stderr, " We need a cluster to dump.\n"); return SLURM_ERROR; } @@ -709,11 +756,13 @@ extern int sacctmgr_dump_cluster (int argc, char *argv[]) list_destroy(assoc_cond.cluster_list); if(!assoc_list) { - printf(" Problem with query.\n"); + exit_code=1; + fprintf(stderr, " Problem with query.\n"); xfree(cluster_name); return SLURM_ERROR; } else if(!list_count(assoc_list)) { - printf(" Cluster %s returned nothing.", cluster_name); + exit_code=1; + fprintf(stderr, " Cluster %s returned nothing.", cluster_name); xfree(cluster_name); return SLURM_ERROR; } @@ -753,12 +802,14 @@ extern int sacctmgr_dump_cluster (int argc, char *argv[]) "# User - lipari:MaxNodesPerJob=2:MaxJobs=3:" "MaxProcSecondsPerJob=4:FairShare=1:" "MaxWallDurationPerJob=1\n") < 0) { - error("Can't write to file"); + exit_code=1; + fprintf(stderr, "Can't write to file"); return SLURM_ERROR; } if(fprintf(fd, "Cluster - %s\n", cluster_name) < 0) { - error("Can't write to file"); + exit_code=1; + fprintf(stderr, "Can't write to file"); return SLURM_ERROR; } diff --git a/src/sacctmgr/common.c b/src/sacctmgr/common.c index 68f2960c115f52006f8f68145d9912797fa2ed34..d043fcdf95a6ccb7b492fda16daaaba06032a61b 100644 --- a/src/sacctmgr/common.c +++ b/src/sacctmgr/common.c @@ -92,14 +92,16 @@ extern void destroy_sacctmgr_assoc(void *object) extern int parse_option_end(char *option) { int end = 0; - + if(!option) return 0; while(option[end] && option[end] != '=') end++; + if(!option[end]) return 0; + end++; return end; } @@ -143,57 +145,6 @@ extern char *strip_quotes(char *option, int *increased) return meat; } -extern void addto_char_list(List char_list, char *names) -{ - int i=0, start=0; - char *name = NULL, *tmp_char = NULL; - ListIterator itr = list_iterator_create(char_list); - - if(names && char_list) { - if (names[i] == '\"' || names[i] == '\'') - i++; - start = i; - while(names[i]) { - if(names[i] == '\"' || names[i] == '\'') - break; - else if(names[i] == ',') { - if((i-start) > 0) { - name = xmalloc((i-start+1)); - memcpy(name, names+start, (i-start)); - - while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) - break; - } - - if(!tmp_char) - list_append(char_list, name); - else - xfree(name); - list_iterator_reset(itr); - } - i++; - start = i; - } - i++; - } - if((i-start) > 0) { - name = xmalloc((i-start)+1); - memcpy(name, names+start, (i-start)); - while((tmp_char = list_next(itr))) { - if(!strcasecmp(tmp_char, name)) - break; - } - - if(!tmp_char) - list_append(char_list, name); - else - xfree(name); - } - } - list_iterator_destroy(itr); -} - extern int notice_thread_init() { pthread_attr_t attr; @@ -353,18 +304,21 @@ extern acct_user_rec_t *sacctmgr_find_user(char *name) { acct_user_rec_t *user = NULL; acct_user_cond_t user_cond; + acct_association_cond_t assoc_cond; List user_list = NULL; if(!name) return NULL; memset(&user_cond, 0, sizeof(acct_user_cond_t)); - user_cond.user_list = list_create(NULL); - list_append(user_cond.user_list, name); + memset(&assoc_cond, 0, sizeof(acct_association_cond_t)); + assoc_cond.user_list = list_create(NULL); + list_append(assoc_cond.user_list, name); + user_cond.assoc_cond = &assoc_cond; user_list = acct_storage_g_get_users(db_conn, &user_cond); - list_destroy(user_cond.user_list); + list_destroy(assoc_cond.user_list); if(user_list) user = list_pop(user_list); @@ -378,18 +332,21 @@ extern acct_account_rec_t *sacctmgr_find_account(char *name) { acct_account_rec_t *account = NULL; acct_account_cond_t account_cond; + acct_association_cond_t assoc_cond; List account_list = NULL; if(!name) return NULL; memset(&account_cond, 0, sizeof(acct_account_cond_t)); - account_cond.acct_list = list_create(NULL); - list_append(account_cond.acct_list, name); + memset(&assoc_cond, 0, sizeof(acct_association_cond_t)); + assoc_cond.acct_list = list_create(NULL); + list_append(assoc_cond.acct_list, name); + account_cond.assoc_cond = &assoc_cond; account_list = acct_storage_g_get_accounts(db_conn, &account_cond); - list_destroy(account_cond.acct_list); + list_destroy(assoc_cond.acct_list); if(account_list) account = list_pop(account_list); @@ -480,6 +437,27 @@ extern acct_association_rec_t *sacctmgr_find_account_base_assoc_from_list( return assoc; } + +extern acct_qos_rec_t *sacctmgr_find_qos_from_list( + List qos_list, char *name) +{ + ListIterator itr = NULL; + acct_qos_rec_t *qos = NULL; + + if(!name || !qos_list) + return NULL; + + itr = list_iterator_create(qos_list); + while((qos = list_next(itr))) { + if(!strcasecmp(name, qos->name)) + break; + } + list_iterator_destroy(itr); + + return qos; + +} + extern acct_user_rec_t *sacctmgr_find_user_from_list( List user_list, char *name) { @@ -562,61 +540,198 @@ extern int get_uint(char *in_value, uint32_t *out_value, char *type) return SLURM_SUCCESS; } -extern void sacctmgr_print_coord_list(type_t type, print_field_t *field, - List value) +extern int addto_qos_char_list(List char_list, List qos_list, char *names, + int option) +{ + int i=0, start=0; + char *name = NULL, *tmp_char = NULL; + ListIterator itr = NULL; + char quote_c = '\0'; + int quote = 0; + uint32_t id=0; + int count = 0; + + if(!char_list) { + error("No list was given to fill in"); + return 0; + } + + if(!qos_list || !list_count(qos_list)) { + debug2("No real qos_list"); + return 0; + } + + itr = list_iterator_create(char_list); + if(names) { + if (names[i] == '\"' || names[i] == '\'') { + quote_c = names[i]; + quote = 1; + i++; + } + start = i; + while(names[i]) { + if(quote && names[i] == quote_c) + break; + else if (names[i] == '\"' || names[i] == '\'') + names[i] = '`'; + else if(names[i] == ',') { + if((i-start) > 0) { + name = xmalloc((i-start+1)); + memcpy(name, names+start, (i-start)); + + id = str_2_acct_qos(qos_list, name); + xfree(name); + if(id == NO_VAL) + goto bad; + + if(option) { + name = xstrdup_printf( + "%c%u", option, id); + } else + name = xstrdup_printf("%u", id); + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + list_iterator_reset(itr); + + if(!tmp_char) { + list_append(char_list, name); + count++; + } else + xfree(name); + } + bad: + i++; + start = i; + if(!names[i]) { + info("There is a problem with " + "your request. It appears you " + "have spaces inside your list."); + break; + } + } + i++; + } + if((i-start) > 0) { + name = xmalloc((i-start)+1); + memcpy(name, names+start, (i-start)); + + id = str_2_acct_qos(qos_list, name); + xfree(name); + if(id == NO_VAL) + goto end_it; + + if(option) { + name = xstrdup_printf( + "%c%u", option, id); + } else + name = xstrdup_printf("%u", id); + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + list_append(char_list, name); + count++; + } else + xfree(name); + } + } +end_it: + list_iterator_destroy(itr); + return count; +} + +extern void sacctmgr_print_coord_list(print_field_t *field, List value) { ListIterator itr = NULL; char *print_this = NULL; acct_coord_rec_t *object = NULL; - switch(type) { - case SLURM_PRINT_HEADLINE: + if(!value || !list_count(value)) { if(print_fields_parsable_print) - printf("%s|", field->name); + print_this = xstrdup(""); else - printf("%-*.*s ", field->len, field->len, field->name); - break; - case SLURM_PRINT_UNDERSCORE: - if(!print_fields_parsable_print) - printf("%-*.*s ", field->len, field->len, - "---------------------------------------"); - break; - case SLURM_PRINT_VALUE: - if(!value || !list_count(value)) { - if(print_fields_parsable_print) - print_this = xstrdup(""); - else - print_this = xstrdup(" "); - } else { - list_sort(value, (ListCmpF)sort_coord_list); - itr = list_iterator_create(value); - while((object = list_next(itr))) { - if(print_this) - xstrfmtcat(print_this, ",%s", - object->name); - else - print_this = xstrdup(object->name); - } - list_iterator_destroy(itr); + print_this = xstrdup(" "); + } else { + list_sort(value, (ListCmpF)sort_coord_list); + itr = list_iterator_create(value); + while((object = list_next(itr))) { + if(print_this) + xstrfmtcat(print_this, ",%s", + object->name); + else + print_this = xstrdup(object->name); } + list_iterator_destroy(itr); + } + + if(print_fields_parsable_print) + printf("%s|", print_this); + else { + if(strlen(print_this) > field->len) + print_this[field->len-1] = '+'; + + printf("%-*.*s ", field->len, field->len, print_this); + } + xfree(print_this); +} - if(print_fields_parsable_print) - printf("%s|", print_this); - else { - if(strlen(print_this) > field->len) - print_this[field->len-1] = '+'; - - printf("%-*.*s ", field->len, field->len, print_this); - } - xfree(print_this); - break; - default: - if(print_fields_parsable_print) - printf("%s|", "n/a"); - else - printf("%-*s ", field->len, "n/a"); - break; +extern void sacctmgr_print_qos_list(print_field_t *field, List qos_list, + List value) +{ + char *print_this = NULL; + + print_this = get_qos_complete_str(qos_list, value); + + if(print_fields_parsable_print) + printf("%s|", print_this); + else { + if(strlen(print_this) > field->len) + print_this[field->len-1] = '+'; + + printf("%-*.*s ", field->len, field->len, print_this); + } + xfree(print_this); +} + +extern char *get_qos_complete_str(List qos_list, List num_qos_list) +{ + List temp_list = NULL; + char *temp_char = NULL; + char *print_this = NULL; + ListIterator itr = NULL; + + if(!qos_list || !list_count(qos_list) + || !num_qos_list || !list_count(num_qos_list)) + return xstrdup("normal"); + + temp_list = list_create(NULL); + + itr = list_iterator_create(num_qos_list); + while((temp_char = list_next(itr))) { + temp_char = acct_qos_str(qos_list, atoi(temp_char)); + if(temp_char) + list_append(temp_list, temp_char); } + list_iterator_destroy(itr); + list_sort(temp_list, (ListCmpF)sort_char_list); + itr = list_iterator_create(temp_list); + while((temp_char = list_next(itr))) { + if(print_this) + xstrfmtcat(print_this, ",%s", temp_char); + else + print_this = xstrdup(temp_char); + } + list_iterator_destroy(itr); + list_destroy(temp_list); + + if(!print_this) + return xstrdup("normal"); + + return print_this; } extern int sort_coord_list(acct_coord_rec_t *coord_a, acct_coord_rec_t *coord_b) @@ -630,3 +745,16 @@ extern int sort_coord_list(acct_coord_rec_t *coord_a, acct_coord_rec_t *coord_b) return 0; } + +extern int sort_char_list(char *name_a, char *name_b) +{ + int diff = strcmp(name_a, name_b); + + if (diff < 0) + return -1; + else if (diff > 0) + return 1; + + return 0; +} + diff --git a/src/sacctmgr/file_functions.c b/src/sacctmgr/file_functions.c index 2f949f63f89a77de10b522c775be92cfc3e9ed38..4e3b82edf52910e86ed700fd2256d39c9cf3e7e2 100644 --- a/src/sacctmgr/file_functions.c +++ b/src/sacctmgr/file_functions.c @@ -1,5 +1,5 @@ /*****************************************************************************\ - * cluster_functions.c - functions dealing with clusters in the + * file_functions.c - functions dealing with files that are generated in the * accounting system. ***************************************************************************** * Copyright (C) 2008 Lawrence Livermore National Security. @@ -52,7 +52,7 @@ typedef struct { char *name; char *org; char *part; - acct_qos_level_t qos; + List qos_list; } sacctmgr_file_opts_t; enum { @@ -71,7 +71,6 @@ enum { PRINT_NAME, PRINT_ORG, PRINT_QOS, - PRINT_QOS_GOLD, PRINT_QOS_RAW, PRINT_PID, PRINT_PARENT, @@ -85,6 +84,8 @@ typedef enum { MOD_USER } sacctmgr_mod_type_t; +static List qos_list = NULL; + static int _strip_continuation(char *buf, int len) { char *ptr; @@ -204,13 +205,12 @@ static sacctmgr_file_opts_t *_parse_options(char *options) sacctmgr_file_opts_t *file_opts = xmalloc(sizeof(sacctmgr_file_opts_t)); char *option = NULL; char quote_c = '\0'; - + file_opts->fairshare = 1; file_opts->max_cpu_secs_per_job = INFINITE; file_opts->max_jobs = INFINITE; file_opts->max_nodes_per_job = INFINITE; file_opts->max_wall_duration_per_job = INFINITE; - file_opts->qos = ACCT_QOS_NORMAL; file_opts->admin = ACCT_ADMIN_NONE; while(options[i]) { @@ -247,73 +247,101 @@ static sacctmgr_file_opts_t *_parse_options(char *options) option = strip_quotes(sub+end, NULL); if(!end) { if(file_opts->name) { - printf(" Bad format on %s: " + exit_code=1; + fprintf(stderr, " Bad format on %s: " "End your option with " "an '=' sign\n", sub); _destroy_sacctmgr_file_opts(file_opts); break; } file_opts->name = xstrdup(option); - } else if (strncasecmp (sub, "AdminLevel", 2) == 0) { + } else if (!strncasecmp (sub, "AdminLevel", 2)) { file_opts->admin = str_2_acct_admin_level(option); - } else if (strncasecmp (sub, "Coordinator", 2) == 0) { + } else if (!strncasecmp (sub, "Coordinator", 2)) { if(!file_opts->coord_list) file_opts->coord_list = list_create(slurm_destroy_char); - addto_char_list(file_opts->coord_list, option); - } else if (strncasecmp (sub, "DefaultAccount", 3) == 0) { + slurm_addto_char_list(file_opts->coord_list, option); + } else if (!strncasecmp (sub, "DefaultAccount", 3)) { file_opts->def_acct = xstrdup(option); - } else if (strncasecmp (sub, "Description", 3) == 0) { + } else if (!strncasecmp (sub, "Description", 3)) { file_opts->desc = xstrdup(option); - } else if (strncasecmp (sub, "FairShare", 1) == 0) { + } else if (!strncasecmp (sub, "FairShare", 1)) { if (get_uint(option, &file_opts->fairshare, "FairShare") != SLURM_SUCCESS) { - printf(" Bad FairShare value: %s\n", option); + exit_code=1; + fprintf(stderr, + " Bad FairShare value: %s\n", option); _destroy_sacctmgr_file_opts(file_opts); break; } - } else if (strncasecmp (sub, "MaxCPUSec", 4) == 0 - || strncasecmp (sub, "MaxProcSec", 4) == 0) { + } else if (!strncasecmp (sub, "MaxCPUSec", 4) + || !strncasecmp (sub, "MaxProcSec", 4)) { if (get_uint(option, &file_opts->max_cpu_secs_per_job, "MaxCPUSec") != SLURM_SUCCESS) { - printf(" Bad MaxCPUSec value: %s\n", option); + exit_code=1; + fprintf(stderr, + " Bad MaxCPUSec value: %s\n", option); _destroy_sacctmgr_file_opts(file_opts); break; } - } else if (strncasecmp (sub, "MaxJobs", 4) == 0) { + } else if (!strncasecmp (sub, "MaxJobs", 4)) { if (get_uint(option, &file_opts->max_jobs, "MaxJobs") != SLURM_SUCCESS) { - printf(" Bad MaxJobs value: %s\n", option); + exit_code=1; + fprintf(stderr, + " Bad MaxJobs value: %s\n", option); _destroy_sacctmgr_file_opts(file_opts); break; } - } else if (strncasecmp (sub, "MaxNodes", 4) == 0) { + } else if (!strncasecmp (sub, "MaxNodes", 4)) { if (get_uint(option, &file_opts->max_nodes_per_job, "MaxNodes") != SLURM_SUCCESS) { - printf(" Bad MaxNodes value: %s\n", option); + exit_code=1; + fprintf(stderr, + " Bad MaxNodes value: %s\n", option); _destroy_sacctmgr_file_opts(file_opts); break; } - } else if (strncasecmp (sub, "MaxWall", 4) == 0) { + } else if (!strncasecmp (sub, "MaxWall", 4)) { mins = time_str2mins(option); if (mins >= 0) { file_opts->max_wall_duration_per_job = (uint32_t) mins; - } else if (strcmp(option, "-1") == 0) { + } else if (strcmp(option, "-1")) { file_opts->max_wall_duration_per_job = INFINITE; } else { - printf(" Bad MaxWall time format: %s\n", + exit_code=1; + fprintf(stderr, + " Bad MaxWall time format: %s\n", option); _destroy_sacctmgr_file_opts(file_opts); break; } - } else if (strncasecmp (sub, "Organization", 1) == 0) { + } else if (!strncasecmp (sub, "Organization", 1)) { file_opts->org = xstrdup(option); - } else if (strncasecmp (sub, "QosLevel", 1) == 0 - || strncasecmp (sub, "Expedite", 1) == 0) { - file_opts->qos = str_2_acct_qos(option); + } else if (!strncasecmp (sub, "QosLevel", 1) + || !strncasecmp (sub, "Expedite", 1)) { + int option2 = 0; + if(!file_opts->qos_list) { + file_opts->qos_list = + list_create(slurm_destroy_char); + } + + if(!qos_list) { + qos_list = acct_storage_g_get_qos( + db_conn, NULL); + } + if(end > 2 && sub[end-1] == '=' + && (sub[end-2] == '+' + || sub[end-2] == '-')) + option2 = (int)sub[end-2]; + + addto_qos_char_list(file_opts->qos_list, qos_list, + option, option2); } else { - printf(" Unknown option: %s\n", sub); + exit_code=1; + fprintf(stderr, " Unknown option: %s\n", sub); } xfree(sub); @@ -330,7 +358,8 @@ static sacctmgr_file_opts_t *_parse_options(char *options) xfree(option); if(!file_opts->name) { - printf(" error: No name given\n"); + exit_code=1; + fprintf(stderr, " No name given\n"); _destroy_sacctmgr_file_opts(file_opts); } return file_opts; @@ -417,21 +446,16 @@ static List _set_up_print_fields(List format_list) field->name = xstrdup("Org"); field->len = 20; field->print_routine = print_fields_str; - } else if(!strncasecmp("QOSGOLD", object, 4)) { - field->type = PRINT_QOS_GOLD; - field->name = xstrdup("QOS_GOLD"); - field->len = 7; - field->print_routine = print_fields_uint; } else if(!strncasecmp("QOSRAW", object, 4)) { field->type = PRINT_QOS_RAW; field->name = xstrdup("QOS_RAW"); field->len = 7; - field->print_routine = print_fields_uint; + field->print_routine = print_fields_char_list; } else if(!strncasecmp("QOS", object, 1)) { field->type = PRINT_QOS; field->name = xstrdup("QOS"); field->len = 9; - field->print_routine = print_fields_str; + field->print_routine = sacctmgr_print_qos_list; } else if(!strncasecmp("Parent", object, 4)) { field->type = PRINT_PARENT; field->name = xstrdup("Parent"); @@ -448,7 +472,8 @@ static List _set_up_print_fields(List format_list) field->len = 10; field->print_routine = print_fields_str; } else { - printf("Unknown field '%s'\n", object); + exit_code=1; + fprintf(stderr, "Unknown field '%s'\n", object); xfree(field); continue; } @@ -472,10 +497,10 @@ static int _print_out_assoc(List assoc_list, bool user) format_list = list_create(slurm_destroy_char); if(user) - addto_char_list(format_list, + slurm_addto_char_list(format_list, "User,Account,F,MaxC,MaxJ,MaxN,MaxW"); else - addto_char_list(format_list, + slurm_addto_char_list(format_list, "Account,Parent,F,MaxC,MaxJ,MaxN,MaxW"); print_fields_list = _set_up_print_fields(format_list); @@ -489,41 +514,41 @@ static int _print_out_assoc(List assoc_list, bool user) while((field = list_next(itr2))) { switch(field->type) { case PRINT_ACCOUNT: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->acct); break; case PRINT_FAIRSHARE: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->fairshare); break; case PRINT_MAXC: field->print_routine( - SLURM_PRINT_VALUE, field, + field, assoc->max_cpu_secs_per_job); break; case PRINT_MAXJ: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->max_jobs); break; case PRINT_MAXN: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->max_nodes_per_job); break; case PRINT_MAXW: field->print_routine( - SLURM_PRINT_VALUE, field, + field, assoc->max_wall_duration_per_job); break; case PRINT_PARENT: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->parent_acct); break; case PRINT_PART: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->partition); break; case PRINT_USER: - field->print_routine(SLURM_PRINT_VALUE, field, + field->print_routine(field, assoc->user); break; default: @@ -656,15 +681,16 @@ static int _mod_acct(sacctmgr_file_opts_t *file_opts, char *desc = NULL, *org = NULL; acct_account_rec_t mod_acct; acct_account_cond_t acct_cond; + acct_association_cond_t assoc_cond; memset(&mod_acct, 0, sizeof(acct_account_rec_t)); memset(&acct_cond, 0, sizeof(acct_account_cond_t)); + memset(&assoc_cond, 0, sizeof(acct_association_cond_t)); if(file_opts->desc) desc = xstrdup(file_opts->desc); - else - desc = xstrdup(file_opts->name); - if(strcmp(desc, acct->description)) { + + if(desc && strcmp(desc, acct->description)) { printf(" Changed description for account " "'%s' from '%s' to '%s'\n", acct->name, @@ -672,15 +698,13 @@ static int _mod_acct(sacctmgr_file_opts_t *file_opts, desc); mod_acct.description = desc; changed = 1; - } + } else + xfree(desc); if(file_opts->org) org = xstrdup(file_opts->org); - else if(strcmp(parent, "root")) - org = xstrdup(parent); - else - org = xstrdup(file_opts->name); - if(strcmp(org, acct->organization)) { + + if(org && strcmp(org, acct->organization)) { printf(" Changed organization for account '%s' " "from '%s' to '%s'\n", acct->name, @@ -688,35 +712,74 @@ static int _mod_acct(sacctmgr_file_opts_t *file_opts, org); mod_acct.organization = org; changed = 1; - } - - if(acct->qos != file_opts->qos) { - printf(" Changed QOS for account '%s' " - "from '%s' to '%s'\n", - acct->name, - acct_qos_str(acct->qos), - acct_qos_str(file_opts->qos)); - mod_acct.qos = file_opts->qos; - changed = 1; + } else + xfree(org); + + if(acct->qos_list && list_count(acct->qos_list) + && file_opts->qos_list && list_count(file_opts->qos_list)) { + ListIterator now_qos_itr = list_iterator_create(acct->qos_list), + new_qos_itr = list_iterator_create(file_opts->qos_list); + char *now_qos = NULL, *new_qos = NULL; + + if(!mod_acct.qos_list) + mod_acct.qos_list = list_create(slurm_destroy_char); + while((new_qos = list_next(new_qos_itr))) { + while((now_qos = list_next(now_qos_itr))) { + if(!strcmp(new_qos, now_qos)) + break; + } + list_iterator_reset(now_qos_itr); + if(!now_qos) + list_append(mod_acct.qos_list, + xstrdup(new_qos)); + } + list_iterator_destroy(new_qos_itr); + list_iterator_destroy(now_qos_itr); + if(mod_acct.qos_list && list_count(mod_acct.qos_list)) + new_qos = get_qos_complete_str(qos_list, + mod_acct.qos_list); + if(new_qos) { + printf(" Adding QOS for account '%s' '%s'\n", + acct->name, + new_qos); + xfree(new_qos); + changed = 1; + } else { + list_destroy(mod_acct.qos_list); + mod_acct.qos_list = NULL; + } + } else if(file_opts->qos_list && list_count(file_opts->qos_list)) { + char *new_qos = get_qos_complete_str(qos_list, + file_opts->qos_list); + + if(new_qos) { + printf(" Adding QOS for account '%s' '%s'\n", + acct->name, + new_qos); + xfree(new_qos); + mod_acct.qos_list = file_opts->qos_list; + file_opts->qos_list = NULL; + changed = 1; + } } if(changed) { List ret_list = NULL; - acct_cond.acct_list = - list_create(NULL); - - list_push(acct_cond.acct_list, - acct->name); - + assoc_cond.acct_list = list_create(NULL); + list_append(assoc_cond.acct_list, acct->name); + acct_cond.assoc_cond = &assoc_cond; + notice_thread_init(); ret_list = acct_storage_g_modify_accounts(db_conn, my_uid, &acct_cond, &mod_acct); notice_thread_fini(); - - list_destroy(acct_cond.acct_list); + list_destroy(assoc_cond.acct_list); + + if(mod_acct.qos_list) + list_destroy(mod_acct.qos_list); /* if(ret_list && list_count(ret_list)) { */ /* char *object = NULL; */ @@ -748,19 +811,20 @@ static int _mod_user(sacctmgr_file_opts_t *file_opts, acct_user_rec_t mod_user; acct_user_cond_t user_cond; List ret_list = NULL; + acct_association_cond_t assoc_cond; memset(&mod_user, 0, sizeof(acct_user_rec_t)); memset(&user_cond, 0, sizeof(acct_user_cond_t)); + memset(&assoc_cond, 0, sizeof(acct_association_cond_t)); - user_cond.user_list = list_create(NULL); - list_push(user_cond.user_list, user->name); + assoc_cond.user_list = list_create(NULL); + list_append(assoc_cond.user_list, user->name); + user_cond.assoc_cond = &assoc_cond; if(file_opts->def_acct) def_acct = xstrdup(file_opts->def_acct); - else - def_acct = xstrdup(parent); - if(strcmp(def_acct, user->default_acct)) { + if(def_acct && strcmp(def_acct, user->default_acct)) { printf(" Changed User '%s' " "default account '%s' -> '%s'\n", user->name, @@ -768,16 +832,54 @@ static int _mod_user(sacctmgr_file_opts_t *file_opts, def_acct); mod_user.default_acct = def_acct; changed = 1; - } + } else + xfree(def_acct); - if(user->qos != file_opts->qos) { - printf(" Changed User '%s' " - "QOS '%s' -> '%s'\n", - user->name, - acct_qos_str(user->qos), - acct_qos_str(file_opts->qos)); - mod_user.qos = file_opts->qos; - changed = 1; + if(user->qos_list && list_count(user->qos_list) + && file_opts->qos_list && list_count(file_opts->qos_list)) { + ListIterator now_qos_itr = list_iterator_create(user->qos_list), + new_qos_itr = list_iterator_create(file_opts->qos_list); + char *now_qos = NULL, *new_qos = NULL; + + if(!mod_user.qos_list) + mod_user.qos_list = list_create(slurm_destroy_char); + while((new_qos = list_next(new_qos_itr))) { + while((now_qos = list_next(now_qos_itr))) { + if(!strcmp(new_qos, now_qos)) + break; + } + list_iterator_reset(now_qos_itr); + if(!now_qos) + list_append(mod_user.qos_list, + xstrdup(new_qos)); + } + list_iterator_destroy(new_qos_itr); + list_iterator_destroy(now_qos_itr); + if(mod_user.qos_list && list_count(mod_user.qos_list)) + new_qos = get_qos_complete_str(qos_list, + mod_user.qos_list); + if(new_qos) { + printf(" Adding QOS for user '%s' '%s'\n", + user->name, + new_qos); + xfree(new_qos); + changed = 1; + } else + list_destroy(mod_user.qos_list); + + } else if(file_opts->qos_list && list_count(file_opts->qos_list)) { + char *new_qos = get_qos_complete_str(qos_list, + file_opts->qos_list); + + if(new_qos) { + printf(" Adding QOS for user '%s' '%s'\n", + user->name, + new_qos); + xfree(new_qos); + mod_user.qos_list = file_opts->qos_list; + file_opts->qos_list = NULL; + changed = 1; + } } if(user->admin_level != file_opts->admin) { @@ -799,6 +901,9 @@ static int _mod_user(sacctmgr_file_opts_t *file_opts, &user_cond, &mod_user); notice_thread_fini(); + + if(mod_user.qos_list) + list_destroy(mod_user.qos_list); /* if(ret_list && list_count(ret_list)) { */ /* char *object = NULL; */ @@ -815,6 +920,7 @@ static int _mod_user(sacctmgr_file_opts_t *file_opts, set = 1; } } + xfree(def_acct); if((!user->coord_accts || !list_count(user->coord_accts)) && (file_opts->coord_list @@ -892,7 +998,7 @@ static int _mod_user(sacctmgr_file_opts_t *file_opts, } list_destroy(add_list); } - list_destroy(user_cond.user_list); + list_destroy(assoc_cond.user_list); return set; } @@ -1070,16 +1176,29 @@ static int _print_file_sacctmgr_assoc_childern(FILE *fd, line = xstrdup_printf( "User - %s", sacctmgr_assoc->sort_name); if(user_rec) { - xstrfmtcat(line, ":DefaultAccount=%s", + xstrfmtcat(line, ":DefaultAccount='%s'", user_rec->default_acct); if(user_rec->admin_level > ACCT_ADMIN_NONE) - xstrfmtcat(line, ":AdminLevel=%s", + xstrfmtcat(line, ":AdminLevel='%s'", acct_admin_level_str( user_rec-> admin_level)); - if(user_rec->qos > ACCT_QOS_NORMAL) - xstrfmtcat(line, ":QOS=%s", - acct_qos_str(user_rec->qos)); + if(user_rec->qos_list + && list_count(user_rec->qos_list)) { + char *temp_char = NULL; + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } + temp_char = get_qos_complete_str( + qos_list, user_rec->qos_list); + xstrfmtcat(line, ":QOS='%s'", + temp_char); + xfree(temp_char); + } + if(user_rec->coord_accts && list_count(user_rec->coord_accts)) { ListIterator itr2 = NULL; @@ -1099,7 +1218,7 @@ static int _print_file_sacctmgr_assoc_childern(FILE *fd, xstrfmtcat( line, ":Coordinator" - "=%s", + "='%s", coord->name); first_coord = 0; } else { @@ -1107,6 +1226,8 @@ static int _print_file_sacctmgr_assoc_childern(FILE *fd, coord->name); } } + if(!first_coord) + xstrcat(line, "'"); list_iterator_destroy(itr2); } } @@ -1120,9 +1241,15 @@ static int _print_file_sacctmgr_assoc_childern(FILE *fd, acct_rec->description); xstrfmtcat(line, ":Organization='%s'", acct_rec->organization); - if(acct_rec->qos > ACCT_QOS_NORMAL) - xstrfmtcat(line, ":QOS=%s", - acct_qos_str(acct_rec->qos)); + if(acct_rec->qos_list) { + char *temp_char = get_qos_complete_str( + qos_list, acct_rec->qos_list); + if(temp_char) { + xstrfmtcat(line, ":QOS='%s'", + temp_char); + xfree(temp_char); + } + } } } if(sacctmgr_assoc->assoc->partition) @@ -1153,7 +1280,8 @@ static int _print_file_sacctmgr_assoc_childern(FILE *fd, if(fprintf(fd, "%s\n", line) < 0) { - error("Can't write to file"); + exit_code=1; + fprintf(stderr, " Can't write to file"); return SLURM_ERROR; } info("%s", line); @@ -1242,7 +1370,8 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) fd = fopen(argv[0], "r"); if (fd == NULL) { - printf(" error: Unable to read \"%s\": %m\n", argv[0]); + exit_code=1; + fprintf(stderr, " Unable to read \"%s\": %m\n", argv[0]); return; } @@ -1253,13 +1382,12 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) user_cond.with_coords = 1; curr_user_list = acct_storage_g_get_users(db_conn, &user_cond); - /* This will be freed in their local counter parts */ - acct_list = list_create(NULL); - acct_assoc_list = list_create(NULL); - user_list = list_create(NULL); - user_assoc_list = list_create(NULL); - /* These are new info so they need to be freed here */ + acct_list = list_create(destroy_acct_account_rec); + acct_assoc_list = list_create(destroy_acct_association_rec); + user_list = list_create(destroy_acct_user_rec); + user_assoc_list = list_create(destroy_acct_association_rec); + mod_acct_list = list_create(destroy_acct_account_rec); mod_user_list = list_create(destroy_acct_user_rec); mod_assoc_list = list_create(destroy_acct_association_rec); @@ -1290,16 +1418,19 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) } } if(!object[0]) { - printf(" error: Misformatted line(%d): %s\n", lc, line); + exit_code=1; + fprintf(stderr, " Misformatted line(%d): %s\n", + lc, line); rc = SLURM_ERROR; break; } while(line[start] != ' ' && start<len) start++; if(start>=len) { - printf(" error: Nothing after object " - "name '%s'. line(%d)\n", - object, lc); + exit_code=1; + fprintf(stderr, " Nothing after object " + "name '%s'. line(%d)\n", + object, lc); rc = SLURM_ERROR; break; @@ -1311,7 +1442,8 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) acct_association_cond_t assoc_cond; if(cluster_name) { - printf(" You can only add one cluster " + exit_code=1; + fprintf(stderr, " You can only add one cluster " "at a time.\n"); rc = SLURM_ERROR; break; @@ -1320,7 +1452,9 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) file_opts = _parse_options(line+start); if(!file_opts) { - printf(" error: Problem with line(%d)\n", lc); + exit_code=1; + fprintf(stderr, + " error: Problem with line(%d)\n", lc); rc = SLURM_ERROR; break; } @@ -1352,8 +1486,11 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) list_destroy(cluster_list); if(rc != SLURM_SUCCESS) { - printf(" Problem adding machine\n"); + exit_code=1; + fprintf(stderr, + " Problem adding machine\n"); rc = SLURM_ERROR; + _destroy_sacctmgr_file_opts(file_opts); break; } set = 1; @@ -1371,16 +1508,18 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) list_destroy(assoc_cond.cluster_list); if(!curr_assoc_list) { - printf(" Problem getting associations " - "for this cluster\n"); + exit_code=1; + fprintf(stderr, " Problem getting associations " + "for this cluster\n"); rc = SLURM_ERROR; break; } //info("got %d assocs", list_count(curr_assoc_list)); continue; } else if(!cluster_name) { - printf(" error: You need to specify a cluster name " - "first with 'Cluster - $NAME' in your file\n"); + exit_code=1; + fprintf(stderr, " You need to specify a cluster name " + "first with 'Cluster - $NAME' in your file\n"); break; } @@ -1392,8 +1531,9 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) i++; if(i >= len) { - printf(" error: No parent name " - "given line(%d)\n", + exit_code=1; + fprintf(stderr, " No parent name " + "given line(%d)\n", lc); rc = SLURM_ERROR; break; @@ -1404,7 +1544,8 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) curr_assoc_list, parent, cluster_name) && !sacctmgr_find_account_base_assoc_from_list( acct_assoc_list, parent, cluster_name)) { - printf(" error: line(%d) You need to add " + exit_code=1; + fprintf(stderr, " line(%d) You need to add " "this parent (%s) as a child before " "you can add childern to it.\n", lc, parent); @@ -1423,7 +1564,8 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) file_opts = _parse_options(line+start); if(!file_opts) { - printf(" error: Problem with line(%d)\n", lc); + exit_code=1; + fprintf(stderr, " Problem with line(%d)\n", lc); rc = SLURM_ERROR; break; } @@ -1453,9 +1595,11 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) /* info("adding acct %s (%s) (%s)", */ /* acct->name, acct->description, */ /* acct->organization); */ - acct->qos = file_opts->qos; + acct->qos_list = file_opts->qos_list; + file_opts->qos_list = NULL; list_append(acct_list, acct); - list_append(curr_acct_list, acct); + /* don't add anything to the + curr_acct_list */ assoc = xmalloc(sizeof(acct_association_rec_t)); assoc->acct = xstrdup(file_opts->name); @@ -1547,7 +1691,8 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) file_opts = _parse_options(line+start); if(!file_opts) { - printf(" error: Problem with line(%d)\n", lc); + exit_code=1; + fprintf(stderr, " Problem with line(%d)\n", lc); rc = SLURM_ERROR; break; } @@ -1564,22 +1709,26 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) else user->default_acct = xstrdup(parent); - user->qos = file_opts->qos; + user->qos_list = file_opts->qos_list; + file_opts->qos_list = NULL; user->admin_level = file_opts->admin; if(file_opts->coord_list) { acct_user_cond_t user_cond; + acct_association_cond_t assoc_cond; ListIterator coord_itr = NULL; char *temp_char = NULL; acct_coord_rec_t *coord = NULL; memset(&user_cond, 0, sizeof(acct_user_cond_t)); - user_cond.user_list = - list_create(NULL); - - list_push(user_cond.user_list, - user->name); + memset(&assoc_cond, 0, + sizeof(acct_association_cond_t)); + assoc_cond.user_list = + list_create(NULL); + list_append(assoc_cond.user_list, + user->name); + user_cond.assoc_cond = &assoc_cond; notice_thread_init(); rc = acct_storage_g_add_coord( @@ -1587,7 +1736,7 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) file_opts->coord_list, &user_cond); notice_thread_fini(); - list_destroy(user_cond.user_list); + list_destroy(assoc_cond.user_list); user->coord_accts = list_create( destroy_acct_coord_rec); coord_itr = list_iterator_create( @@ -1706,7 +1855,9 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) _destroy_sacctmgr_file_opts(file_opts); continue; } else { - printf(" error: Misformatted line(%d): %s\n", lc, line); + exit_code=1; + fprintf(stderr, + " Misformatted line(%d): %s\n", lc, line); rc = SLURM_ERROR; break; } @@ -1718,7 +1869,7 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) START_TIMER; if(rc == SLURM_SUCCESS && list_count(acct_list)) { printf("Accounts\n"); - addto_char_list(format_list, + slurm_addto_char_list(format_list, "Name,Description,Organization,QOS"); print_fields_list = _set_up_print_fields(format_list); @@ -1733,23 +1884,21 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) switch(field->type) { case PRINT_DESC: field->print_routine( - SLURM_PRINT_VALUE, field, acct->description); break; case PRINT_NAME: field->print_routine( - SLURM_PRINT_VALUE, field, acct->name); break; case PRINT_ORG: field->print_routine( - SLURM_PRINT_VALUE, field, acct->organization); break; case PRINT_QOS: field->print_routine( - SLURM_PRINT_VALUE, field, - acct_qos_str(acct->qos)); + field, + qos_list, + acct->qos_list); break; default: break; @@ -1775,7 +1924,8 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) if(rc == SLURM_SUCCESS && list_count(user_list)) { printf("Users\n"); - addto_char_list(format_list, "Name,Default,QOS,Admin,Coord"); + slurm_addto_char_list(format_list, + "Name,Default,QOS,Admin,Coord"); print_fields_list = _set_up_print_fields(format_list); list_flush(format_list); @@ -1788,30 +1938,29 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) switch(field->type) { case PRINT_ADMIN: field->print_routine( - SLURM_PRINT_VALUE, field, + field, acct_admin_level_str( user->admin_level)); break; case PRINT_COORDS: field->print_routine( - SLURM_PRINT_VALUE, field, user->coord_accts); break; case PRINT_DACCT: field->print_routine( - SLURM_PRINT_VALUE, field, + field, user->default_acct); break; case PRINT_NAME: field->print_routine( - SLURM_PRINT_VALUE, field, user->name); break; case PRINT_QOS: field->print_routine( - SLURM_PRINT_VALUE, field, - acct_qos_str(user->qos)); + field, + qos_list, + user->qos_list); break; default: break; @@ -1851,14 +2000,18 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) printf(" Nothing new added.\n"); } } else { - printf(" error: Problem with requests.\n"); + exit_code=1; + fprintf(stderr, " Problem with requests.\n"); } list_destroy(format_list); + list_destroy(mod_acct_list); list_destroy(acct_list); list_destroy(acct_assoc_list); + list_destroy(mod_user_list); list_destroy(user_list); list_destroy(user_assoc_list); + list_destroy(mod_assoc_list); if(curr_acct_list) list_destroy(curr_acct_list); if(curr_assoc_list) diff --git a/src/sacctmgr/qos_functions.c b/src/sacctmgr/qos_functions.c new file mode 100644 index 0000000000000000000000000000000000000000..95a4661923b88601057f71f310b6eb7a9fd45f2e --- /dev/null +++ b/src/sacctmgr/qos_functions.c @@ -0,0 +1,414 @@ +/*****************************************************************************\ + * qos_functions.c - functions dealing with qoss in the + * accounting system. + ***************************************************************************** + * Copyright (C) 2002-2008 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Danny Auble <da@llnl.gov> + * LLNL-CODE-402394. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include "src/sacctmgr/sacctmgr.h" + +static int _set_cond(int *start, int argc, char *argv[], + acct_qos_cond_t *qos_cond, + List format_list) +{ + int i; + int set = 0; + int end = 0; + + if(!qos_cond) { + error("No qos_cond given"); + return -1; + } + + for (i=(*start); i<argc; i++) { + end = parse_option_end(argv[i]); + if (!strncasecmp (argv[i], "Set", 3)) { + i--; + } else if (!strncasecmp (argv[i], "WithDeleted", 5)) { + qos_cond->with_deleted = 1; + } else if(!end && !strncasecmp(argv[i], "where", 5)) { + continue; + } else if(!end + || !strncasecmp (argv[i], "Names", 1) + || !strncasecmp (argv[i], "QOS", 1)) { + if(!qos_cond->name_list) { + qos_cond->name_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list(qos_cond->name_list, + argv[i]+end)) + set = 1; + } else if(!strncasecmp (argv[i], "Descriptions", 1)) { + if(!qos_cond->description_list) { + qos_cond->description_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list(qos_cond->description_list, + argv[i]+end)) + set = 1; + } else if(!strncasecmp (argv[i], "Ids", 1)) { + if(!qos_cond->id_list) { + qos_cond->id_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list(qos_cond->id_list, + argv[i]+end)) + set = 1; + } else { + exit_code=1; + fprintf(stderr, " Unknown condition: %s\n" + " Use keyword 'set' to modify " + "SLURM_PRINT_VALUE\n", argv[i]); + } + } + + (*start) = i; + + return set; +} + +/* static int _set_rec(int *start, int argc, char *argv[], */ +/* acct_qos_rec_t *qos) */ +/* { */ +/* int i; */ +/* int set = 0; */ +/* int end = 0; */ + +/* for (i=(*start); i<argc; i++) { */ +/* end = parse_option_end(argv[i]); */ +/* if (!strncasecmp (argv[i], "Where", 5)) { */ +/* i--; */ +/* break; */ +/* } else if(!end && !strncasecmp(argv[i], "set", 3)) { */ +/* continue; */ +/* } else if(!end) { */ +/* printf(" Bad format on %s: End your option with " */ +/* "an '=' sign\n", argv[i]); */ +/* } else if (!strncasecmp (argv[i], "Description", 1)) { */ +/* if(!qos->description) */ +/* qos->description = */ +/* strip_quotes(argv[i]+end, NULL); */ +/* set = 1; */ +/* } else if (!strncasecmp (argv[i], "Name", 1)) { */ +/* if(!qos->name) */ +/* qos->name = strip_quotes(argv[i]+end, NULL); */ +/* set = 1; */ +/* } else { */ +/* printf(" Unknown option: %s\n" */ +/* " Use keyword 'where' to modify condition\n", */ +/* argv[i]); */ +/* } */ +/* } */ + +/* (*start) = i; */ + +/* return set; */ +/* } */ + +extern int sacctmgr_add_qos(int argc, char *argv[]) +{ + int rc = SLURM_SUCCESS; + int i=0; + ListIterator itr = NULL; + acct_qos_rec_t *qos = NULL; + List name_list = list_create(slurm_destroy_char); + char *description = NULL; + char *name = NULL; + List qos_list = NULL; + List local_qos_list = NULL; + char *qos_str = NULL; + + for (i=0; i<argc; i++) { + int end = parse_option_end(argv[i]); + if(!end || !strncasecmp (argv[i], "Names", 1)) { + slurm_addto_char_list(name_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Description", 1)) { + description = strip_quotes(argv[i]+end, NULL); + } else { + exit_code=1; + fprintf(stderr, " Unknown option: %s\n", argv[i]); + } + } + + if(exit_code) { + list_destroy(name_list); + xfree(description); + return SLURM_ERROR; + } else if(!list_count(name_list)) { + list_destroy(name_list); + xfree(description); + exit_code=1; + fprintf(stderr, " Need name of qos to add.\n"); + return SLURM_SUCCESS; + } + + + local_qos_list = acct_storage_g_get_qos(db_conn, NULL); + + if(!local_qos_list) { + exit_code=1; + fprintf(stderr, " Problem getting qos's from database. " + "Contact your admin.\n"); + list_destroy(name_list); + xfree(description); + return SLURM_ERROR; + } + + qos_list = list_create(destroy_acct_qos_rec); + + itr = list_iterator_create(name_list); + while((name = list_next(itr))) { + qos = NULL; + if(!sacctmgr_find_qos_from_list(local_qos_list, name)) { + qos = xmalloc(sizeof(acct_qos_rec_t)); + qos->name = xstrdup(name); + if(description) + qos->description = xstrdup(description); + else + qos->description = xstrdup(name); + + xstrfmtcat(qos_str, " %s\n", name); + list_append(qos_list, qos); + } + } + list_iterator_destroy(itr); + list_destroy(local_qos_list); + list_destroy(name_list); + + if(!list_count(qos_list)) { + printf(" Nothing new added.\n"); + goto end_it; + } + + if(qos_str) { + printf(" Adding QOS(s)\n%s", qos_str); + printf(" Settings\n"); + if(description) + printf(" Description = %s\n", description); + else + printf(" Description = %s\n", "QOS Name"); + xfree(qos_str); + } + + notice_thread_init(); + if(list_count(qos_list)) + rc = acct_storage_g_add_qos(db_conn, my_uid, qos_list); + else + goto end_it; + + notice_thread_fini(); + + if(rc == SLURM_SUCCESS) { + if(commit_check("Would you like to commit changes?")) { + acct_storage_g_commit(db_conn, 1); + } else { + printf(" Changes Discarded\n"); + acct_storage_g_commit(db_conn, 0); + } + } else { + exit_code=1; + fprintf(stderr, " Problem adding QOS.\n"); + rc = SLURM_ERROR; + } + +end_it: + list_destroy(qos_list); + xfree(description); + + return rc; +} + +extern int sacctmgr_list_qos(int argc, char *argv[]) +{ + int rc = SLURM_SUCCESS; + acct_qos_cond_t *qos_cond = xmalloc(sizeof(acct_qos_cond_t)); + int i=0; + ListIterator itr = NULL; + ListIterator itr2 = NULL; + acct_qos_rec_t *qos = NULL; + char *object; + List qos_list = NULL; + + print_field_t *field = NULL; + + List format_list = list_create(slurm_destroy_char); + List print_fields_list; /* types are of print_field_t */ + + enum { + PRINT_DESC, + PRINT_ID, + PRINT_NAME + }; + + _set_cond(&i, argc, argv, qos_cond, format_list); + + if(exit_code) { + destroy_acct_txn_cond(qos_cond); + list_destroy(format_list); + return SLURM_ERROR; + } else if(!list_count(format_list)) { + slurm_addto_char_list(format_list, "N"); + } + + print_fields_list = list_create(destroy_print_field); + + itr = list_iterator_create(format_list); + while((object = list_next(itr))) { + field = xmalloc(sizeof(print_field_t)); + if(!strncasecmp("Description", object, 1)) { + field->type = PRINT_DESC; + field->name = xstrdup("Descr"); + field->len = 20; + field->print_routine = print_fields_str; + } else if(!strncasecmp("ID", object, 1)) { + field->type = PRINT_ID; + field->name = xstrdup("ID"); + field->len = 6; + field->print_routine = print_fields_uint; + } else if(!strncasecmp("Name", object, 1)) { + field->type = PRINT_NAME; + field->name = xstrdup("NAME"); + field->len = 10; + field->print_routine = print_fields_str; + } else { + exit_code=1; + fprintf(stderr, "Unknown field '%s'\n", object); + xfree(field); + continue; + } + list_append(print_fields_list, field); + } + list_iterator_destroy(itr); + list_destroy(format_list); + + if(exit_code) { + list_destroy(print_fields_list); + return SLURM_ERROR; + } + qos_list = acct_storage_g_get_qos(db_conn, qos_cond); + destroy_acct_qos_cond(qos_cond); + + if(!qos_list) { + exit_code=1; + fprintf(stderr, " Problem with query.\n"); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + itr = list_iterator_create(qos_list); + itr2 = list_iterator_create(print_fields_list); + print_fields_header(print_fields_list); + + while((qos = list_next(itr))) { + while((field = list_next(itr2))) { + switch(field->type) { + case PRINT_DESC: + field->print_routine( + field, qos->description); + break; + case PRINT_ID: + field->print_routine( + field, qos->id); + break; + case PRINT_NAME: + field->print_routine( + field, qos->name); + break; + default: + break; + } + } + list_iterator_reset(itr2); + printf("\n"); + } + list_iterator_destroy(itr2); + list_iterator_destroy(itr); + list_destroy(qos_list); + list_destroy(print_fields_list); + + return rc; +} + +extern int sacctmgr_delete_qos(int argc, char *argv[]) +{ + int rc = SLURM_SUCCESS; + acct_qos_cond_t *qos_cond = + xmalloc(sizeof(acct_qos_cond_t)); + int i=0; + List ret_list = NULL; + int set = 0; + + if(!(set = _set_cond(&i, argc, argv, qos_cond, NULL))) { + exit_code=1; + fprintf(stderr, + " No conditions given to remove, not executing.\n"); + destroy_acct_qos_cond(qos_cond); + return SLURM_ERROR; + } else if(set == -1) { + destroy_acct_qos_cond(qos_cond); + return SLURM_ERROR; + } + + notice_thread_init(); + ret_list = acct_storage_g_remove_qos(db_conn, my_uid, qos_cond); + notice_thread_fini(); + destroy_acct_qos_cond(qos_cond); + + if(ret_list && list_count(ret_list)) { + char *object = NULL; + ListIterator itr = list_iterator_create(ret_list); + printf(" Deleting QOS(s)...\n"); + + while((object = list_next(itr))) { + printf(" %s\n", object); + } + list_iterator_destroy(itr); + if(commit_check("Would you like to commit changes?")) { + acct_storage_g_commit(db_conn, 1); + } else { + printf(" Changes Discarded\n"); + acct_storage_g_commit(db_conn, 0); + } + } else if(ret_list) { + printf(" Nothing deleted\n"); + } else { + exit_code=1; + fprintf(stderr, " Error with request\n"); + rc = SLURM_ERROR; + } + + if(ret_list) + list_destroy(ret_list); + + return rc; +} diff --git a/src/sacctmgr/sacctmgr.c b/src/sacctmgr/sacctmgr.c index cbdc03aa6b8404addb422d241abf737c797c7131..fe53cf9a06f243b9de5572be9b68046ce6efd6b5 100644 --- a/src/sacctmgr/sacctmgr.c +++ b/src/sacctmgr/sacctmgr.c @@ -51,6 +51,7 @@ int exit_flag; /* program to terminate if =1 */ int input_words; /* number of words of input permitted */ int one_liner; /* one record per line if =1 */ int quiet_flag; /* quiet=1, verbose=-1, normal=0 */ +int verbosity; /* count of -v options */ int rollback_flag; /* immediate execute=1, else = 0 */ int with_assoc_flag = 0; void *db_conn = NULL; @@ -71,6 +72,7 @@ main (int argc, char *argv[]) int error_code = SLURM_SUCCESS, i, opt_char, input_field_count; char **input_fields; log_options_t opts = LOG_OPTS_STDERR_ONLY ; + int local_exit_code = 0; int option_index; static struct option long_options[] = { @@ -96,6 +98,7 @@ main (int argc, char *argv[]) exit_flag = 0; input_field_count = 0; quiet_flag = 0; + verbosity = 0; log_init("sacctmgr", opts, SYSLOG_FACILITY_DAEMON, NULL); if (getenv ("SACCTMGR_ALL")) @@ -139,6 +142,7 @@ main (int argc, char *argv[]) break; case (int)'v': quiet_flag = -1; + verbosity++; break; case (int)'V': _print_version(); @@ -163,6 +167,12 @@ main (int argc, char *argv[]) } } + if (verbosity) { + opts.stderr_level += verbosity; + opts.prefix_level = 1; + log_alter(opts, 0, NULL); + } + db_conn = acct_storage_g_get_connection(false, rollback_flag); my_uid = getuid(); @@ -176,8 +186,17 @@ main (int argc, char *argv[]) if (error_code || exit_flag) break; error_code = _get_command (&input_field_count, input_fields); + /* This is here so if someone made a mistake we allow + * them to fix it and let the process happen since there + * are checks for global exit_code we need to reset it. + */ + if(exit_code) { + local_exit_code = exit_code; + exit_code = 0; + } } - + if(local_exit_code) + exit_code = local_exit_code; acct_storage_g_close_connection(&db_conn); slurm_acct_storage_fini(); exit(exit_code); @@ -454,23 +473,25 @@ static void _add_it (int argc, char *argv[]) int error_code = SLURM_SUCCESS; /* First identify the entity to add */ - if (strncasecmp (argv[0], "User", 1) == 0) { - error_code = sacctmgr_add_user((argc - 1), &argv[1]); + if (strncasecmp (argv[0], "Account", 1) == 0) { + error_code = sacctmgr_add_account((argc - 1), &argv[1]); } else if (strncasecmp (argv[0], "Cluster", 2) == 0) { error_code = sacctmgr_add_cluster((argc - 1), &argv[1]); } else if (strncasecmp (argv[0], "Coordinator", 2) == 0) { error_code = sacctmgr_add_coord((argc - 1), &argv[1]); - } else if (strncasecmp (argv[0], "Account", 1) == 0) { - error_code = sacctmgr_add_account((argc - 1), &argv[1]); + } else if (strncasecmp (argv[0], "QOS", 1) == 0) { + error_code = sacctmgr_add_qos((argc - 1), &argv[1]); + } else if (strncasecmp (argv[0], "User", 1) == 0) { + error_code = sacctmgr_add_user((argc - 1), &argv[1]); } else { exit_code = 1; fprintf(stderr, "No valid entity in add command\n"); fprintf(stderr, "Input line must include, "); fprintf(stderr, "\"User\", \"Account\", \"Coordinator\", "); - fprintf(stderr, "or \"Cluster\"\n"); + fprintf(stderr, "\"Cluster\", or \"QOS\"\n"); } - if (error_code) { + if (error_code == SLURM_ERROR) { exit_code = 1; } } @@ -479,29 +500,35 @@ static void _add_it (int argc, char *argv[]) * _show_it - list the slurm configuration per the supplied arguments * IN argc - count of arguments * IN argv - list of arguments + * undocumented association options wopi and wopl + * without parent info and without parent limits */ static void _show_it (int argc, char *argv[]) { int error_code = SLURM_SUCCESS; /* First identify the entity to list */ - if (strncasecmp (argv[0], "User", 1) == 0) { - error_code = sacctmgr_list_user((argc - 1), &argv[1]); - } else if (strncasecmp (argv[0], "Account", 2) == 0) { + if (strncasecmp (argv[0], "Account", 2) == 0) { error_code = sacctmgr_list_account((argc - 1), &argv[1]); } else if (strncasecmp (argv[0], "Association", 2) == 0) { error_code = sacctmgr_list_association((argc - 1), &argv[1]); } else if (strncasecmp (argv[0], "Cluster", 1) == 0) { error_code = sacctmgr_list_cluster((argc - 1), &argv[1]); + } else if (strncasecmp (argv[0], "QOS", 1) == 0) { + error_code = sacctmgr_list_qos((argc - 1), &argv[1]); + } else if (strncasecmp (argv[0], "Transactions", 1) == 0) { + error_code = sacctmgr_list_txn((argc - 1), &argv[1]); + } else if (strncasecmp (argv[0], "User", 1) == 0) { + error_code = sacctmgr_list_user((argc - 1), &argv[1]); } else { exit_code = 1; fprintf(stderr, "No valid entity in list command\n"); fprintf(stderr, "Input line must include "); fprintf(stderr, "\"User\", \"Account\", \"Association\", "); - fprintf(stderr, "or \"Cluster\"\n"); + fprintf(stderr, "\"Cluster\", or \"QOS\"\n"); } - if (error_code) { + if (error_code == SLURM_ERROR) { exit_code = 1; } } @@ -517,12 +544,12 @@ static void _modify_it (int argc, char *argv[]) int error_code = SLURM_SUCCESS; /* First identify the entity to modify */ - if (strncasecmp (argv[0], "User", 1) == 0) { - error_code = sacctmgr_modify_user((argc - 1), &argv[1]); - } else if (strncasecmp (argv[0], "Account", 1) == 0) { + if (strncasecmp (argv[0], "Account", 1) == 0) { error_code = sacctmgr_modify_account((argc - 1), &argv[1]); } else if (strncasecmp (argv[0], "Cluster", 1) == 0) { error_code = sacctmgr_modify_cluster((argc - 1), &argv[1]); + } else if (strncasecmp (argv[0], "User", 1) == 0) { + error_code = sacctmgr_modify_user((argc - 1), &argv[1]); } else { exit_code = 1; fprintf(stderr, "No valid entity in modify command\n"); @@ -531,7 +558,7 @@ static void _modify_it (int argc, char *argv[]) fprintf(stderr, "or \"Cluster\"\n"); } - if (error_code) { + if (error_code == SLURM_ERROR) { exit_code = 1; } } @@ -546,23 +573,25 @@ static void _delete_it (int argc, char *argv[]) int error_code = SLURM_SUCCESS; /* First identify the entity to delete */ - if (strncasecmp (argv[0], "User", 1) == 0) { - error_code = sacctmgr_delete_user((argc - 1), &argv[1]); - } else if (strncasecmp (argv[0], "Account", 1) == 0) { + if (strncasecmp (argv[0], "Account", 1) == 0) { error_code = sacctmgr_delete_account((argc - 1), &argv[1]); } else if (strncasecmp (argv[0], "Cluster", 2) == 0) { error_code = sacctmgr_delete_cluster((argc - 1), &argv[1]); } else if (strncasecmp (argv[0], "Coordinator", 2) == 0) { error_code = sacctmgr_delete_coord((argc - 1), &argv[1]); + } else if (strncasecmp (argv[0], "QOS", 2) == 0) { + error_code = sacctmgr_delete_qos((argc - 1), &argv[1]); + } else if (strncasecmp (argv[0], "User", 1) == 0) { + error_code = sacctmgr_delete_user((argc - 1), &argv[1]); } else { exit_code = 1; fprintf(stderr, "No valid entity in delete command\n"); fprintf(stderr, "Input line must include "); fprintf(stderr, "\"User\", \"Account\", \"Coordinator\", "); - fprintf(stderr, "or \"Cluster\"\n"); + fprintf(stderr, "\"Cluster\", or \"QOS\"\n"); } - if (error_code) { + if (error_code == SLURM_ERROR) { exit_code = 1; } } @@ -611,7 +640,8 @@ sacctmgr [<OPTION>] [<COMMAND>] \n\ version display tool version number. \n\ !! Repeat the last command entered. \n\ \n\ - <ENTITY> may be \"cluster\", \"account\", \"user\", of \"coordinator\". \n\ + <ENTITY> may be \"account\", \"association\", \"cluster\", \n\ + \"coordinator\", \"qos\", \"transaction\", or \"user\". \n\ \n\ <SPECS> are different for each command entity pair. \n\ list account - Clusters=, Descriptions=, Format=, Names=, \n\ @@ -638,6 +668,16 @@ sacctmgr [<OPTION>] [<COMMAND>] \n\ (where options) Names= \n\ delete cluster - Names= \n\ \n\ + add coordinator - Accounts=, and Names= \n\ + delete coordinator - Accounts=, and Names= \n\ + \n\ + list qos - Descriptions=, Ids=, Names=, and WithDeleted \n\ + add qos - Description=, and Names= \n\ + delete qos - Descriptions=, Ids=, and Names= \n\ + \n\ + list transactions - Actor=, EndTime, \n\ + Format=, ID=, and Start= \n\ + \n\ list user - AdminLevel=, DefaultAccounts=, Format=, Names=,\n\ QosLevel=, and WithAssocs \n\ add user - Accounts=, AdminLevel=, Clusters=, \n\ @@ -653,8 +693,24 @@ sacctmgr [<OPTION>] [<COMMAND>] \n\ delete user - Accounts=, AdminLevel=, Clusters=, \n\ DefaultAccounts=, and Names= \n\ \n\ - add coordinator - Accounts=, and Names= \n\ - delete coordinator - Accounts=, and Names= \n\ + Format options are different for listing each entity pair. \n\ + \n\ + Account - Account, Cluster, CoordinatorList, \n\ + Description, Organization, QOS, QOSRAW \n\ + \n\ + Association - Account, Cluster, Fairshare, ID, LFT, \n\ + MaxCPUSecs, MaxJobs, MaxNodes, MaxWall, \n\ + ParentID, ParentName, Partition, RGT, User \n\ + \n\ + Cluster - Cluster, ControlHost, ControlPort, Fairshare \n\ + MaxCPUSecs, MaxJobs, MaxNodes, MaxWall \n\ + \n\ + QOS - Description, ID, Name \n\ + \n\ + Transactions - Action, Actor, ID, Info, TimeStamp, Where \n\ + \n\ + User - Account, AdminLevel, Cluster, CoordinatorList, \n\ + DefaultAccount, QOS, QOSRAW, User \n\ \n\ \n\ All commands entitys, and options are case-insensitive. \n\n"); diff --git a/src/sacctmgr/sacctmgr.h b/src/sacctmgr/sacctmgr.h index 5a23297855e9cf156c88d26a95bd9dcf38ffaa5e..b935efa51b0c21ca75837f7e8c8f3d3d19d3aee3 100644 --- a/src/sacctmgr/sacctmgr.h +++ b/src/sacctmgr/sacctmgr.h @@ -109,11 +109,13 @@ extern int sacctmgr_add_user(int argc, char *argv[]); extern int sacctmgr_add_account(int argc, char *argv[]); extern int sacctmgr_add_cluster(int argc, char *argv[]); extern int sacctmgr_add_coord(int argc, char *argv[]); +extern int sacctmgr_add_qos(int argc, char *argv[]); extern int sacctmgr_list_association(int argc, char *argv[]); extern int sacctmgr_list_user(int argc, char *argv[]); extern int sacctmgr_list_account(int argc, char *argv[]); extern int sacctmgr_list_cluster(int argc, char *argv[]); +extern int sacctmgr_list_qos(int argc, char *argv[]); extern int sacctmgr_modify_association(int argc, char *argv[]); extern int sacctmgr_modify_user(int argc, char *argv[]); @@ -125,6 +127,7 @@ extern int sacctmgr_delete_user(int argc, char *argv[]); extern int sacctmgr_delete_account(int argc, char *argv[]); extern int sacctmgr_delete_cluster(int argc, char *argv[]); extern int sacctmgr_delete_coord(int argc, char *argv[]); +extern int sacctmgr_delete_qos(int argc, char *argv[]); /* this has pointers to assoc_list so do not destroy assoc_list before * using the list returned from this function. @@ -137,15 +140,19 @@ extern int sacctmgr_dump_cluster(int argc, char *argv[]); extern void destroy_sacctmgr_assoc(void *object); extern int parse_option_end(char *option); extern char *strip_quotes(char *option, int *increased); -extern void addto_char_list(List char_list, char *names); extern int notice_thread_init(); extern int notice_thread_fini(); extern int commit_check(char *warning); extern int get_uint(char *in_value, uint32_t *out_value, char *type); -extern void sacctmgr_print_coord_list(type_t type, print_field_t *field, - List value); +extern int addto_qos_char_list(List char_list, List qos_list, char *names, + int option); +extern void sacctmgr_print_coord_list(print_field_t *field, List value); +extern void sacctmgr_print_qos_list(print_field_t *field, List qos_list, + List value); +extern char *get_qos_complete_str(List qos_list, List num_qos_list); extern int sort_coord_list(acct_coord_rec_t *coord_a, acct_coord_rec_t *coord_b); +extern int sort_char_list(char *name_a, char *name_b); /* you need to free the objects returned from these functions */ extern acct_association_rec_t *sacctmgr_find_association(char *user, @@ -168,6 +175,8 @@ extern acct_association_rec_t *sacctmgr_find_association_from_list( char *cluster, char *partition); extern acct_association_rec_t *sacctmgr_find_account_base_assoc_from_list( List assoc_list, char *account, char *cluster); +extern acct_qos_rec_t *sacctmgr_find_qos_from_list( + List qos_list, char *name); extern acct_user_rec_t *sacctmgr_find_user_from_list( List user_list, char *name); extern acct_account_rec_t *sacctmgr_find_account_from_list( @@ -184,4 +193,7 @@ extern int print_file_sacctmgr_assoc_list(FILE *fd, extern void load_sacctmgr_cfg_file (int argc, char *argv[]); +/* txn_functions.c */ +extern int sacctmgr_list_txn(int argc, char *argv[]); + #endif diff --git a/src/sacctmgr/txn_functions.c b/src/sacctmgr/txn_functions.c new file mode 100644 index 0000000000000000000000000000000000000000..b98286b01d77331c41951c3e44677b2d7c9a6e42 --- /dev/null +++ b/src/sacctmgr/txn_functions.c @@ -0,0 +1,242 @@ +/*****************************************************************************\ + * txn_functions.c - functions dealing with transactions in the + * accounting system. + ***************************************************************************** + * Copyright (C) 2008 Lawrence Livermore National Security. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Danny Auble <da@llnl.gov> + * LLNL-CODE-402394. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#include "src/sacctmgr/sacctmgr.h" +#include "src/common/slurmdbd_defs.h" + +static int _set_cond(int *start, int argc, char *argv[], + acct_txn_cond_t *txn_cond, + List format_list) +{ + int i, end = 0; + int set = 0; + + for (i=(*start); i<argc; i++) { + end = parse_option_end(argv[i]); + if(!end && !strncasecmp(argv[i], "where", 5)) { + continue; + } else if(!end + || (!strncasecmp (argv[i], "Id", 1)) + || (!strncasecmp (argv[i], "Txn", 1))) { + if(!txn_cond->id_list) + txn_cond->id_list = + list_create(slurm_destroy_char); + + if(slurm_addto_char_list(txn_cond->id_list, + argv[i]+end)) + set = 1; + } else if (!strncasecmp (argv[i], "Action", 4)) { + /* FIX ME! fill this in */ +/* if(!txn_cond->action_list) */ +/* txn_cond->action_list = */ +/* list_create(slurm_destroy_char); */ + +/* if(slurm_addto_char_list(txn_cond->action_list, */ +/* argv[i]+end)) */ +/* set = 1; */ + } else if (!strncasecmp (argv[i], "Actors", 4) + || !strncasecmp (argv[i], "User", 1)) { + if(!txn_cond->actor_list) + txn_cond->actor_list = + list_create(slurm_destroy_char); + if(slurm_addto_char_list(txn_cond->actor_list, + argv[i]+end)) + set = 1; + } else if (!strncasecmp (argv[i], "End", 1)) { + txn_cond->time_end = parse_time(argv[i]+end); + set = 1; + } else if (!strncasecmp (argv[i], "Format", 1)) { + if(format_list) + slurm_addto_char_list(format_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Start", 1)) { + txn_cond->time_start = parse_time(argv[i]+end); + set = 1; + } else { + exit_code=1; + fprintf(stderr, " Unknown condition: %s\n", argv[i]); + } + } + (*start) = i; + + return set; +} + + +extern int sacctmgr_list_txn(int argc, char *argv[]) +{ + int rc = SLURM_SUCCESS; + acct_txn_cond_t *txn_cond = xmalloc(sizeof(acct_txn_cond_t)); + List txn_list = NULL; + acct_txn_rec_t *txn = NULL; + int i=0; + ListIterator itr = NULL; + ListIterator itr2 = NULL; + char *object = NULL; + + print_field_t *field = NULL; + + List format_list = list_create(slurm_destroy_char); + List print_fields_list; /* types are of print_field_t */ + + enum { + PRINT_ACTION, + PRINT_ACTOR, + PRINT_ID, + PRINT_INFO, + PRINT_TS, + PRINT_WHERE + }; + + _set_cond(&i, argc, argv, txn_cond, format_list); + + if(exit_code) { + destroy_acct_txn_cond(txn_cond); + list_destroy(format_list); + return SLURM_ERROR; + } + + print_fields_list = list_create(destroy_print_field); + + if(!list_count(format_list)) + slurm_addto_char_list(format_list, "T,Action,Actor,Where,Info"); + + itr = list_iterator_create(format_list); + while((object = list_next(itr))) { + field = xmalloc(sizeof(print_field_t)); + if(!strncasecmp("Action", object, 4)) { + field->type = PRINT_ACTION; + field->name = xstrdup("Action"); + field->len = 20; + field->print_routine = print_fields_str; + } else if(!strncasecmp("Actor", object, 4)) { + field->type = PRINT_ACTOR; + field->name = xstrdup("Actor"); + field->len = 10; + field->print_routine = print_fields_str; + } else if(!strncasecmp("ID", object, 2)) { + field->type = PRINT_ID; + field->name = xstrdup("ID"); + field->len = 6; + field->print_routine = print_fields_uint; + } else if(!strncasecmp("Info", object, 2)) { + field->type = PRINT_INFO; + field->name = xstrdup("Info"); + field->len = 20; + field->print_routine = print_fields_str; + } else if(!strncasecmp("TimeStamp", object, 1)) { + field->type = PRINT_TS; + field->name = xstrdup("Time"); + field->len = 15; + field->print_routine = print_fields_date; + } else if(!strncasecmp("Where", object, 1)) { + field->type = PRINT_WHERE; + field->name = xstrdup("Where"); + field->len = 20; + field->print_routine = print_fields_str; + } else { + exit_code=1; + fprintf(stderr, " Unknown field '%s'\n", object); + xfree(field); + continue; + } + list_append(print_fields_list, field); + } + list_iterator_destroy(itr); + list_destroy(format_list); + + if(exit_code) { + list_destroy(print_fields_list); + return SLURM_ERROR; + } + + txn_list = acct_storage_g_get_txn(db_conn, txn_cond); + destroy_acct_txn_cond(txn_cond); + + if(!txn_list) { + exit_code=1; + fprintf(stderr, " Problem with query.\n"); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + itr = list_iterator_create(txn_list); + itr2 = list_iterator_create(print_fields_list); + print_fields_header(print_fields_list); + + while((txn = list_next(itr))) { + while((field = list_next(itr2))) { + switch(field->type) { + case PRINT_ACTION: + field->print_routine( + field, + slurmdbd_msg_type_2_str(txn->action)); + break; + case PRINT_ACTOR: + field->print_routine(field, + txn->actor_name); + break; + case PRINT_ID: + field->print_routine(field, + txn->id); + break; + case PRINT_INFO: + field->print_routine(field, + txn->set_info); + break; + case PRINT_TS: + field->print_routine(field, + txn->timestamp); + break; + case PRINT_WHERE: + field->print_routine(field, + txn->where_query); + break; + default: + break; + } + } + list_iterator_reset(itr2); + printf("\n"); + } + + list_iterator_destroy(itr2); + list_iterator_destroy(itr); + list_destroy(txn_list); + list_destroy(print_fields_list); + return rc; +} diff --git a/src/sacctmgr/user_functions.c b/src/sacctmgr/user_functions.c index e30becaf55937e775f7965192b20cc2d4aa3d8d2..acaf69df75e1a5f729cd957b97bc8b10d79def49 100644 --- a/src/sacctmgr/user_functions.c +++ b/src/sacctmgr/user_functions.c @@ -46,60 +46,113 @@ static int _set_cond(int *start, int argc, char *argv[], int u_set = 0; int a_set = 0; int end = 0; + List qos_list = NULL; + + if(!user_cond) { + error("No user_cond given"); + return -1; + } + + if(!user_cond->assoc_cond) { + user_cond->assoc_cond = + xmalloc(sizeof(acct_association_cond_t)); + user_cond->assoc_cond->fairshare = NO_VAL; + user_cond->assoc_cond->max_cpu_secs_per_job = NO_VAL; + user_cond->assoc_cond->max_jobs = NO_VAL; + user_cond->assoc_cond->max_nodes_per_job = NO_VAL; + user_cond->assoc_cond->max_wall_duration_per_job = NO_VAL; + } for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Set", 3) == 0) { + if (!strncasecmp (argv[i], "Set", 3)) { i--; break; - } else if (!end && strncasecmp (argv[i], "WithAssoc", 5) == 0) { + } else if (!end && !strncasecmp (argv[i], "WithAssoc", 5)) { user_cond->with_assocs = 1; - } else if (strncasecmp (argv[i], "WithCoordinators", 5) == 0) { + } else if (!strncasecmp (argv[i], "WithCoordinators", 5)) { user_cond->with_coords = 1; } else if(!end && !strncasecmp(argv[i], "where", 5)) { continue; - } else if(!end) { - addto_char_list(user_cond->user_list, argv[i]); - addto_char_list(user_cond->assoc_cond->user_list, - argv[i]); - u_set = 1; - } else if (strncasecmp (argv[i], "Account", 2) == 0) { - addto_char_list(user_cond->assoc_cond->acct_list, - argv[i]+end); - a_set = 1; - } else if (strncasecmp (argv[i], "AdminLevel", 2) == 0) { + } else if(!end + || !strncasecmp (argv[i], "Names", 1) + || !strncasecmp (argv[i], "Users", 1)) { + if(!user_cond->assoc_cond->user_list) { + user_cond->assoc_cond->user_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list( + user_cond->assoc_cond->user_list, + argv[i]+end)) + u_set = 1; + } else if (!strncasecmp (argv[i], "Account", 2)) { + if(!user_cond->assoc_cond->acct_list) { + user_cond->assoc_cond->acct_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list( + user_cond->assoc_cond->acct_list, + argv[i]+end)) + a_set = 1; + } else if (!strncasecmp (argv[i], "AdminLevel", 2)) { user_cond->admin_level = str_2_acct_admin_level(argv[i]+end); u_set = 1; - } else if (strncasecmp (argv[i], "Clusters", 1) == 0) { - addto_char_list(user_cond->assoc_cond->cluster_list, - argv[i]+end); - a_set = 1; - } else if (strncasecmp (argv[i], "DefaultAccount", 1) == 0) { - addto_char_list(user_cond->def_acct_list, - argv[i]+end); - u_set = 1; - } else if (strncasecmp (argv[i], "Format", 1) == 0) { + } else if (!strncasecmp (argv[i], "Clusters", 1)) { + if(!user_cond->assoc_cond->cluster_list) { + user_cond->assoc_cond->cluster_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list( + user_cond->assoc_cond->cluster_list, + argv[i]+end)) + a_set = 1; + } else if (!strncasecmp (argv[i], "DefaultAccount", 1)) { + if(!user_cond->def_acct_list) { + user_cond->def_acct_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list(user_cond->def_acct_list, + argv[i]+end)) + u_set = 1; + } else if (!strncasecmp (argv[i], "Format", 1)) { if(format_list) - addto_char_list(format_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Names", 1) == 0 - || strncasecmp (argv[i], "Users", 1) == 0) { - addto_char_list(user_cond->user_list, argv[i]+end); - addto_char_list(user_cond->assoc_cond->user_list, - argv[i]+end); - u_set = 1; - } else if (strncasecmp (argv[i], "Partition", 3) == 0) { - addto_char_list(user_cond->assoc_cond->partition_list, - argv[i]+end); - a_set = 1; - } else if (strncasecmp (argv[i], "QosLevel", 1) == 0) { - user_cond->qos = str_2_acct_qos(argv[i]+end); + slurm_addto_char_list(format_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Partition", 3)) { + if(!user_cond->assoc_cond->partition_list) { + user_cond->assoc_cond->partition_list = + list_create(slurm_destroy_char); + } + if(slurm_addto_char_list( + user_cond->assoc_cond->partition_list, + argv[i]+end)) + a_set = 1; + } else if (!strncasecmp (argv[i], "QosLevel", 1)) { + int option = 0; + if(!user_cond->qos_list) { + user_cond->qos_list = + list_create(slurm_destroy_char); + } + + if(!qos_list) { + qos_list = acct_storage_g_get_qos( + db_conn, NULL); + } + + addto_qos_char_list(user_cond->qos_list, qos_list, + argv[i]+end, option); u_set = 1; } else { - printf(" Unknown condition: %s\n" - " Use keyword 'set' to modify value\n", argv[i]); + exit_code=1; + fprintf(stderr, " Unknown condition: %s\n" + " Use keyword 'set' to modify value\n", + argv[i]); } } + + if(qos_list) + list_destroy(qos_list); + (*start) = i; if(a_set) { @@ -118,51 +171,54 @@ static int _set_rec(int *start, int argc, char *argv[], int u_set = 0; int a_set = 0; int end = 0; + List qos_list = NULL; for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Where", 5) == 0) { + if (!strncasecmp (argv[i], "Where", 5)) { i--; break; } else if(!end && !strncasecmp(argv[i], "set", 3)) { continue; } else if(!end) { - printf(" Bad format on %s: End your option with " - "an '=' sign\n", argv[i]); - } else if (strncasecmp (argv[i], "AdminLevel", 2) == 0) { + exit_code=1; + fprintf(stderr, + " Bad format on %s: End your option with " + "an '=' sign\n", argv[i]); + } else if (!strncasecmp (argv[i], "AdminLevel", 2)) { user->admin_level = str_2_acct_admin_level(argv[i]+end); u_set = 1; - } else if (strncasecmp (argv[i], "DefaultAccount", 1) == 0) { + } else if (!strncasecmp (argv[i], "DefaultAccount", 1)) { user->default_acct = strip_quotes(argv[i]+end, NULL); u_set = 1; - } else if (strncasecmp (argv[i], "FairShare", 1) == 0) { + } else if (!strncasecmp (argv[i], "FairShare", 1)) { if(!association) continue; if (get_uint(argv[i]+end, &association->fairshare, - "FairShare") == SLURM_SUCCESS) + "FairShare") == SLURM_SUCCESS) a_set = 1; - } else if (strncasecmp (argv[i], "MaxCPUSec", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxCPUSec", 4)) { if(!association) continue; if (get_uint(argv[i]+end, - &association->max_cpu_secs_per_job, - "MaxCPUSec") == SLURM_SUCCESS) + &association->max_cpu_secs_per_job, + "MaxCPUSec") == SLURM_SUCCESS) a_set = 1; - } else if (strncasecmp (argv[i], "MaxJobs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxJobs", 4)) { if(!association) continue; if (get_uint(argv[i]+end, &association->max_jobs, "MaxJobs") == SLURM_SUCCESS) a_set = 1; - } else if (strncasecmp (argv[i], "MaxNodes", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxNodes", 4)) { if(!association) continue; if (get_uint(argv[i]+end, &association->max_nodes_per_job, "MaxNodes") == SLURM_SUCCESS) a_set = 1; - } else if (strncasecmp (argv[i], "MaxWall", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxWall", 4)) { if(!association) continue; mins = time_str2mins(argv[i]+end); @@ -171,18 +227,41 @@ static int _set_rec(int *start, int argc, char *argv[], = (uint32_t) mins; a_set = 1; } else { - printf(" Bad MaxWall time format: %s\n", + exit_code=1; + fprintf(stderr, + " Bad MaxWall time format: %s\n", argv[i]); } - } else if (strncasecmp (argv[i], "QosLevel", 1) == 0) { - user->qos = str_2_acct_qos(argv[i]+end); + } else if (!strncasecmp (argv[i], "QosLevel", 1)) { + int option = 0; + if(!user->qos_list) { + user->qos_list = + list_create(slurm_destroy_char); + } + + if(!qos_list) { + qos_list = acct_storage_g_get_qos( + db_conn, NULL); + } + + if(end > 2 && argv[i][end-1] == '=' + && (argv[i][end-2] == '+' + || argv[i][end-2] == '-')) + option = (int)argv[i][end-2]; + + addto_qos_char_list(user->qos_list, qos_list, + argv[i]+end, option); u_set = 1; } else { - printf(" Unknown option: %s\n" - " Use keyword 'where' to modify condition\n", - argv[i]); + exit_code=1; + fprintf(stderr, " Unknown option: %s\n" + " Use keyword 'where' to modify condition\n", + argv[i]); } } + if(qos_list) + list_destroy(qos_list); + (*start) = i; if(u_set && a_set) @@ -207,7 +286,8 @@ extern int sacctmgr_add_user(int argc, char *argv[]) char *default_acct = NULL; acct_association_cond_t *assoc_cond = NULL; acct_association_cond_t query_assoc_cond; - acct_qos_level_t qos = ACCT_QOS_NOTSET; + List add_qos_list = NULL; + List qos_list = NULL; acct_admin_level_t admin_level = ACCT_ADMIN_NOTSET; char *name = NULL, *account = NULL, *cluster = NULL, *partition = NULL; int partition_set = 0; @@ -242,73 +322,96 @@ extern int sacctmgr_add_user(int argc, char *argv[]) for (i=0; i<argc; i++) { int end = parse_option_end(argv[i]); if(!end) { - addto_char_list(assoc_cond->user_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Accounts", 2) == 0) { - addto_char_list(assoc_cond->acct_list, + slurm_addto_char_list(assoc_cond->user_list, + argv[i]+end); + } else if (!strncasecmp (argv[i], "Accounts", 2)) { + slurm_addto_char_list(assoc_cond->acct_list, argv[i]+end); - } else if (strncasecmp (argv[i], "AdminLevel", 2) == 0) { + } else if (!strncasecmp (argv[i], "AdminLevel", 2)) { admin_level = str_2_acct_admin_level(argv[i]+end); - } else if (strncasecmp (argv[i], "Clusters", 1) == 0) { - addto_char_list(assoc_cond->cluster_list, + } else if (!strncasecmp (argv[i], "Clusters", 1)) { + slurm_addto_char_list(assoc_cond->cluster_list, argv[i]+end); - } else if (strncasecmp (argv[i], "DefaultAccount", 1) == 0) { + } else if (!strncasecmp (argv[i], "DefaultAccount", 1)) { default_acct = strip_quotes(argv[i]+end, NULL); - addto_char_list(assoc_cond->acct_list, + slurm_addto_char_list(assoc_cond->acct_list, default_acct); - } else if (strncasecmp (argv[i], "FairShare", 1) == 0) { + } else if (!strncasecmp (argv[i], "FairShare", 1)) { if (get_uint(argv[i]+end, &fairshare, "FairShare") == SLURM_SUCCESS) limit_set = 1; - } else if (strncasecmp (argv[i], "MaxCPUSecs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxCPUSecs", 4)) { if (get_uint(argv[i]+end, &max_cpu_secs_per_job, "MaxCPUSecs") == SLURM_SUCCESS) limit_set = 1; - } else if (strncasecmp (argv[i], "MaxJobs", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxJobs", 4)) { if (get_uint(argv[i]+end, &max_jobs, "MaxJobs") == SLURM_SUCCESS) limit_set = 1; - } else if (strncasecmp (argv[i], "MaxNodes", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxNodes", 4)) { if (get_uint(argv[i]+end, &max_nodes_per_job, "MaxNodes") == SLURM_SUCCESS) limit_set = 1; - } else if (strncasecmp (argv[i], "MaxWall", 4) == 0) { + } else if (!strncasecmp (argv[i], "MaxWall", 4)) { mins = time_str2mins(argv[i]+end); if (mins != NO_VAL) { max_wall_duration_per_job = (uint32_t) mins; limit_set = 1; } else { - printf(" Bad MaxWall time format: %s\n", + exit_code=1; + fprintf(stderr, + " Bad MaxWall time format: %s\n", argv[i]); } - } else if (strncasecmp (argv[i], "Names", 1) == 0) { - addto_char_list(assoc_cond->user_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Partitions", 1) == 0) { - addto_char_list(assoc_cond->partition_list, + } else if (!strncasecmp (argv[i], "Names", 1)) { + slurm_addto_char_list(assoc_cond->user_list, + argv[i]+end); + } else if (!strncasecmp (argv[i], "Partitions", 1)) { + slurm_addto_char_list(assoc_cond->partition_list, argv[i]+end); - } else if (strncasecmp (argv[i], "QosLevel", 1) == 0) { - qos = str_2_acct_qos(argv[i]+end); + } else if (!strncasecmp (argv[i], "QosLevel", 1)) { + int option = 0; + if(!add_qos_list) { + add_qos_list = + list_create(slurm_destroy_char); + } + + if(!qos_list) { + qos_list = acct_storage_g_get_qos( + db_conn, NULL); + } + + addto_qos_char_list(add_qos_list, qos_list, + argv[i]+end, option); } else { - printf(" Unknown option: %s\n", argv[i]); + exit_code=1; + fprintf(stderr, " Unknown option: %s\n", argv[i]); } } - if(!list_count(assoc_cond->user_list)) { + if(exit_code) { destroy_acct_association_cond(assoc_cond); - printf(" Need name of user to add.\n"); + return SLURM_ERROR; + } else if(!list_count(assoc_cond->user_list)) { + destroy_acct_association_cond(assoc_cond); + exit_code=1; + fprintf(stderr, " Need name of user to add.\n"); return SLURM_ERROR; } else { acct_user_cond_t user_cond; memset(&user_cond, 0, sizeof(acct_user_cond_t)); - user_cond.user_list = assoc_cond->user_list; - + user_cond.assoc_cond = assoc_cond; + local_user_list = acct_storage_g_get_users( db_conn, &user_cond); } + if(!local_user_list) { - printf(" Problem getting users from database. " - "Contact your admin.\n"); + exit_code=1; + fprintf(stderr, " Problem getting users from database. " + "Contact your admin.\n"); destroy_acct_association_cond(assoc_cond); return SLURM_ERROR; } @@ -316,13 +419,14 @@ extern int sacctmgr_add_user(int argc, char *argv[]) if(!list_count(assoc_cond->acct_list)) { destroy_acct_association_cond(assoc_cond); - printf(" Need name of acct to add user to.\n"); + exit_code=1; + fprintf(stderr, " Need name of acct to add user to.\n"); return SLURM_ERROR; } else { acct_account_cond_t account_cond; memset(&account_cond, 0, sizeof(acct_account_cond_t)); - account_cond.acct_list = assoc_cond->acct_list; + account_cond.assoc_cond = assoc_cond; local_acct_list = acct_storage_g_get_accounts( db_conn, &account_cond); @@ -330,7 +434,8 @@ extern int sacctmgr_add_user(int argc, char *argv[]) } if(!local_acct_list) { - printf(" Problem getting accounts from database. " + exit_code=1; + fprintf(stderr, " Problem getting accounts from database. " "Contact your admin.\n"); list_destroy(local_user_list); destroy_acct_association_cond(assoc_cond); @@ -344,7 +449,9 @@ extern int sacctmgr_add_user(int argc, char *argv[]) cluster_list = acct_storage_g_get_clusters(db_conn, NULL); if(!cluster_list) { - printf(" Problem getting clusters from database. " + exit_code=1; + fprintf(stderr, + " Problem getting clusters from database. " "Contact your admin.\n"); destroy_acct_association_cond(assoc_cond); list_destroy(local_user_list); @@ -360,8 +467,10 @@ extern int sacctmgr_add_user(int argc, char *argv[]) list_iterator_destroy(itr_c); if(!list_count(assoc_cond->cluster_list)) { - printf(" Can't add users, no cluster defined yet.\n" - " Please contact your administrator.\n"); + exit_code=1; + fprintf(stderr, + " Can't add users, no cluster defined yet.\n" + " Please contact your administrator.\n"); destroy_acct_association_cond(assoc_cond); list_destroy(local_user_list); list_destroy(local_acct_list); @@ -391,7 +500,8 @@ extern int sacctmgr_add_user(int argc, char *argv[]) user = NULL; if(!sacctmgr_find_user_from_list(local_user_list, name)) { if(!default_acct) { - printf(" Need a default account for " + exit_code=1; + fprintf(stderr, " Need a default account for " "these users to add.\n"); rc = SLURM_ERROR; goto no_default; @@ -399,10 +509,11 @@ extern int sacctmgr_add_user(int argc, char *argv[]) if(first) { if(!sacctmgr_find_account_from_list( local_acct_list, default_acct)) { - printf(" error: This account '%s' " - "doesn't exist.\n" - " Contact your admin " - "to add this account.\n", + exit_code=1; + fprintf(stderr, " This account '%s' " + "doesn't exist.\n" + " Contact your admin " + "to add this account.\n", default_acct); continue; } @@ -412,7 +523,20 @@ extern int sacctmgr_add_user(int argc, char *argv[]) user->assoc_list = list_create(NULL); user->name = xstrdup(name); user->default_acct = xstrdup(default_acct); - user->qos = qos; + + if(add_qos_list && list_count(add_qos_list)) { + char *tmp_qos = NULL; + ListIterator qos_itr = + list_iterator_create(add_qos_list); + user->qos_list = + list_create(slurm_destroy_char); + while((tmp_qos = list_next(qos_itr))) { + list_append(user->qos_list, + xstrdup(tmp_qos)); + } + list_iterator_destroy(qos_itr); + } + user->admin_level = admin_level; xstrfmtcat(user_str, " %s\n", name); @@ -424,10 +548,11 @@ extern int sacctmgr_add_user(int argc, char *argv[]) if(acct_first) { if(!sacctmgr_find_account_from_list( local_acct_list, default_acct)) { - printf(" error: This account '%s' " - "doesn't exist.\n" - " Contact your admin " - "to add this account.\n", + exit_code=1; + fprintf(stderr, " This account '%s' " + "doesn't exist.\n" + " Contact your admin " + "to add this account.\n", account); continue; } @@ -437,15 +562,17 @@ extern int sacctmgr_add_user(int argc, char *argv[]) if(!sacctmgr_find_account_base_assoc_from_list( local_assoc_list, account, cluster)) { - if(acct_first) - printf(" error: This " - "account '%s' " - "doesn't exist on " - "cluster %s\n" - " Contact your " - "admin " - "to add this account.\n", - account, cluster); + if(acct_first) { + exit_code=1; + fprintf(stderr, " This " + "account '%s' " + "doesn't exist on " + "cluster %s\n" + " Contact your " + "admin to add " + "this account.\n", + account, cluster); + } continue; } @@ -533,7 +660,8 @@ no_default: printf(" Nothing new added.\n"); goto end_it; } else if(!assoc_str) { - printf(" Error: no associations created.\n"); + exit_code=1; + fprintf(stderr, " No associations created.\n"); goto end_it; } @@ -541,8 +669,14 @@ no_default: printf(" Adding User(s)\n%s", user_str); printf(" Settings =\n"); printf(" Default Account = %s\n", default_acct); - if(qos != ACCT_QOS_NOTSET) - printf(" Qos = %s\n", acct_qos_str(qos)); + if(add_qos_list) { + char *temp_char = get_qos_complete_str( + qos_list, add_qos_list); + if(temp_char) { + printf(" Qos = %s\n", temp_char); + xfree(temp_char); + } + } if(admin_level != ACCT_ADMIN_NOTSET) printf(" Admin Level = %s\n", @@ -598,7 +732,8 @@ no_default: rc = acct_storage_g_add_associations(db_conn, my_uid, assoc_list); } else { - printf(" error: Problem adding users\n"); + exit_code=1; + fprintf(stderr, " Problem adding users\n"); rc = SLURM_ERROR; notice_thread_fini(); goto end_it; @@ -613,11 +748,14 @@ no_default: acct_storage_g_commit(db_conn, 0); } } else { - printf(" error: Problem adding user associations\n"); + exit_code=1; + fprintf(stderr, " Problem adding user associations\n"); rc = SLURM_ERROR; } end_it: + if(add_qos_list) + list_destroy(add_qos_list); list_destroy(user_list); list_destroy(assoc_list); xfree(default_acct); @@ -636,32 +774,22 @@ extern int sacctmgr_add_coord(int argc, char *argv[]) char *acct_str = NULL; ListIterator itr = NULL; - user_cond->user_list = list_create(slurm_destroy_char); - user_cond->def_acct_list = list_create(slurm_destroy_char); - - user_cond->assoc_cond = xmalloc(sizeof(acct_association_cond_t)); - user_cond->assoc_cond->user_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->acct_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->partition_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->fairshare = NO_VAL; - user_cond->assoc_cond->max_cpu_secs_per_job = NO_VAL; - user_cond->assoc_cond->max_jobs = NO_VAL; - user_cond->assoc_cond->max_nodes_per_job = NO_VAL; - user_cond->assoc_cond->max_wall_duration_per_job = NO_VAL; - for (i=0; i<argc; i++) { cond_set = _set_cond(&i, argc, argv, user_cond, NULL); } - if(!cond_set) { - printf(" You need to specify a user list " - "and account list here.\n"); + if(exit_code) { + destroy_acct_user_cond(user_cond); + return SLURM_ERROR; + } else if(!cond_set) { + exit_code=1; + fprintf(stderr, " You need to specify conditions to " + "to add the coordinator.\n"); destroy_acct_user_cond(user_cond); return SLURM_ERROR; } - itr = list_iterator_create(user_cond->user_list); + itr = list_iterator_create(user_cond->assoc_cond->user_list); while((name = list_next(itr))) { xstrfmtcat(user_str, " %s\n", name); @@ -669,8 +797,8 @@ extern int sacctmgr_add_coord(int argc, char *argv[]) list_iterator_destroy(itr); if(!user_str) { - printf(" You need to specify a user list " - "and account list here.\n"); + exit_code=1; + fprintf(stderr, " You need to specify a user list here.\n"); destroy_acct_user_cond(user_cond); return SLURM_ERROR; } @@ -681,8 +809,8 @@ extern int sacctmgr_add_coord(int argc, char *argv[]) } list_iterator_destroy(itr); if(!acct_str) { - printf(" You need to specify a user list " - "and account list here.\n"); + exit_code=1; + fprintf(stderr, " You need to specify a account list here.\n"); destroy_acct_user_cond(user_cond); return SLURM_ERROR; } @@ -705,7 +833,8 @@ extern int sacctmgr_add_coord(int argc, char *argv[]) acct_storage_g_commit(db_conn, 0); } } else { - printf(" error: Problem adding coordinator\n"); + exit_code=1; + fprintf(stderr, " Problem adding coordinator\n"); rc = SLURM_ERROR; } @@ -723,6 +852,7 @@ extern int sacctmgr_list_user(int argc, char *argv[]) acct_user_rec_t *user = NULL; acct_association_rec_t *assoc = NULL; char *object; + List qos_list = NULL; print_field_t *field = NULL; @@ -742,7 +872,6 @@ extern int sacctmgr_list_user(int argc, char *argv[]) PRINT_MAXN, PRINT_MAXW, PRINT_QOS, - PRINT_QOS_GOLD, PRINT_QOS_RAW, PRINT_PID, PRINT_PNAME, @@ -750,34 +879,23 @@ extern int sacctmgr_list_user(int argc, char *argv[]) PRINT_USER }; - user_cond->user_list = list_create(slurm_destroy_char); - user_cond->def_acct_list = list_create(slurm_destroy_char); user_cond->with_assocs = with_assoc_flag; - user_cond->assoc_cond = xmalloc(sizeof(acct_association_cond_t)); - user_cond->assoc_cond->user_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->acct_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->partition_list = list_create(slurm_destroy_char); - _set_cond(&i, argc, argv, user_cond, format_list); + if(exit_code) { + destroy_acct_user_cond(user_cond); + list_destroy(format_list); + return SLURM_ERROR; + } + if(!list_count(format_list)) { - addto_char_list(format_list, "U,D,Q,Ad"); + slurm_addto_char_list(format_list, "U,D,Q,Ad"); if(user_cond->with_assocs) - addto_char_list(format_list, + slurm_addto_char_list(format_list, "Cl,Ac,Part,F,MaxC,MaxJ,MaxN,MaxW"); if(user_cond->with_coords) - addto_char_list(format_list, "Coord"); - } - - user_list = acct_storage_g_get_users(db_conn, user_cond); - destroy_acct_user_cond(user_cond); - - if(!user_list) { - printf(" Problem with query.\n"); - list_destroy(format_list); - return SLURM_ERROR; + slurm_addto_char_list(format_list, "Coord"); } print_fields_list = list_create(destroy_print_field); @@ -840,21 +958,16 @@ extern int sacctmgr_list_user(int argc, char *argv[]) field->name = xstrdup("MaxWall"); field->len = 11; field->print_routine = print_fields_time; - } else if(!strncasecmp("QOSGOLD", object, 4)) { - field->type = PRINT_QOS_GOLD; - field->name = xstrdup("QOS_GOLD"); - field->len = 7; - field->print_routine = print_fields_uint; } else if(!strncasecmp("QOSRAW", object, 4)) { field->type = PRINT_QOS_RAW; field->name = xstrdup("QOS_RAW"); - field->len = 7; - field->print_routine = print_fields_uint; + field->len = 10; + field->print_routine = print_fields_char_list; } else if(!strncasecmp("QOS", object, 1)) { field->type = PRINT_QOS; field->name = xstrdup("QOS"); - field->len = 9; - field->print_routine = print_fields_str; + field->len = 20; + field->print_routine = sacctmgr_print_qos_list; } else if(!strncasecmp("ParentID", object, 7)) { field->type = PRINT_PID; field->name = xstrdup("Par ID"); @@ -871,7 +984,8 @@ extern int sacctmgr_list_user(int argc, char *argv[]) field->len = 10; field->print_routine = print_fields_str; } else { - printf("Unknown field '%s'\n", object); + exit_code=1; + fprintf(stderr, "Unknown field '%s'\n", object); xfree(field); continue; } @@ -880,6 +994,22 @@ extern int sacctmgr_list_user(int argc, char *argv[]) list_iterator_destroy(itr); list_destroy(format_list); + if(exit_code) { + destroy_acct_user_cond(user_cond); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + + user_list = acct_storage_g_get_users(db_conn, user_cond); + destroy_acct_user_cond(user_cond); + + if(!user_list) { + exit_code=1; + fprintf(stderr, " Problem with query.\n"); + list_destroy(print_fields_list); + return SLURM_ERROR; + } + itr = list_iterator_create(user_list); itr2 = list_iterator_create(print_fields_list); print_fields_header(print_fields_list); @@ -894,13 +1024,11 @@ extern int sacctmgr_list_user(int argc, char *argv[]) switch(field->type) { case PRINT_ACCOUNT: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->acct); break; case PRINT_ADMIN: field->print_routine( - SLURM_PRINT_VALUE, field, acct_admin_level_str( user-> @@ -908,101 +1036,93 @@ extern int sacctmgr_list_user(int argc, char *argv[]) break; case PRINT_CLUSTER: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->cluster); break; case PRINT_COORDS: field->print_routine( - SLURM_PRINT_VALUE, field, user->coord_accts); break; case PRINT_DACCT: field->print_routine( - SLURM_PRINT_VALUE, field, user->default_acct); break; case PRINT_FAIRSHARE: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->fairshare); break; case PRINT_ID: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->id); break; case PRINT_MAXC: field->print_routine( - SLURM_PRINT_VALUE, field, assoc-> max_cpu_secs_per_job); break; case PRINT_MAXJ: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->max_jobs); break; case PRINT_MAXN: field->print_routine( - SLURM_PRINT_VALUE, field, assoc-> max_nodes_per_job); break; case PRINT_MAXW: field->print_routine( - SLURM_PRINT_VALUE, field, assoc-> max_wall_duration_per_job); break; case PRINT_QOS: + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } field->print_routine( - SLURM_PRINT_VALUE, - field, - acct_qos_str( - user->qos)); - break; - case PRINT_QOS_GOLD: - field->print_routine( - SLURM_PRINT_VALUE, field, - user->qos-1); + qos_list, + user->qos_list); break; case PRINT_QOS_RAW: + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } field->print_routine( - SLURM_PRINT_VALUE, field, - user->qos); + qos_list, + user->qos_list); break; case PRINT_PID: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->parent_id); break; case PRINT_PNAME: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->parent_acct); break; case PRINT_PART: field->print_routine( - SLURM_PRINT_VALUE, field, assoc->partition); break; case PRINT_USER: field->print_routine( - SLURM_PRINT_VALUE, field, user->name); break; @@ -1019,89 +1139,95 @@ extern int sacctmgr_list_user(int argc, char *argv[]) switch(field->type) { case PRINT_ACCOUNT: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_ADMIN: field->print_routine( - SLURM_PRINT_VALUE, field, + field, acct_admin_level_str( user->admin_level)); break; case PRINT_CLUSTER: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_COORDS: field->print_routine( - SLURM_PRINT_VALUE, field, user->coord_accts); break; case PRINT_DACCT: field->print_routine( - SLURM_PRINT_VALUE, field, + field, user->default_acct); break; case PRINT_FAIRSHARE: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_ID: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_MAXC: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_MAXJ: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_MAXN: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_MAXW: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_QOS: + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } field->print_routine( - SLURM_PRINT_VALUE, field, - acct_qos_str(user->qos)); - break; - case PRINT_QOS_GOLD: - field->print_routine( - SLURM_PRINT_VALUE, field, - user->qos-1); + field, qos_list, + user->qos_list); break; case PRINT_QOS_RAW: + if(!qos_list) { + qos_list = + acct_storage_g_get_qos( + db_conn, + NULL); + } field->print_routine( - SLURM_PRINT_VALUE, field, - user->qos); + field, qos_list, + user->qos_list); break; case PRINT_PID: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_PART: field->print_routine( - SLURM_PRINT_VALUE, field, + field, NULL); break; case PRINT_USER: field->print_routine( - SLURM_PRINT_VALUE, field, + field, user->name); break; default: @@ -1131,20 +1257,6 @@ extern int sacctmgr_modify_user(int argc, char *argv[]) int cond_set = 0, rec_set = 0, set = 0; List ret_list = NULL; - user_cond->user_list = list_create(slurm_destroy_char); - user_cond->def_acct_list = list_create(slurm_destroy_char); - - user_cond->assoc_cond = xmalloc(sizeof(acct_association_cond_t)); - user_cond->assoc_cond->user_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->acct_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->partition_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->fairshare = NO_VAL; - user_cond->assoc_cond->max_cpu_secs_per_job = NO_VAL; - user_cond->assoc_cond->max_jobs = NO_VAL; - user_cond->assoc_cond->max_nodes_per_job = NO_VAL; - user_cond->assoc_cond->max_wall_duration_per_job = NO_VAL; - assoc->fairshare = NO_VAL; assoc->max_cpu_secs_per_job = NO_VAL; assoc->max_jobs = NO_VAL; @@ -1152,11 +1264,11 @@ extern int sacctmgr_modify_user(int argc, char *argv[]) assoc->max_wall_duration_per_job = NO_VAL; for (i=0; i<argc; i++) { - if (strncasecmp (argv[i], "Where", 5) == 0) { + if (!strncasecmp (argv[i], "Where", 5)) { i++; cond_set = _set_cond(&i, argc, argv, user_cond, NULL); - } else if (strncasecmp (argv[i], "Set", 3) == 0) { + } else if (!strncasecmp (argv[i], "Set", 3)) { i++; rec_set = _set_rec(&i, argc, argv, user, assoc); } else { @@ -1164,8 +1276,14 @@ extern int sacctmgr_modify_user(int argc, char *argv[]) } } - if(!rec_set) { - printf(" You didn't give me anything to set\n"); + if(exit_code) { + destroy_acct_user_cond(user_cond); + destroy_acct_user_rec(user); + destroy_acct_association_rec(assoc); + return SLURM_ERROR; + } else if(!rec_set) { + exit_code=1; + fprintf(stderr, " You didn't give me anything to set\n"); destroy_acct_user_cond(user_cond); destroy_acct_user_rec(user); destroy_acct_association_rec(assoc); @@ -1197,8 +1315,10 @@ extern int sacctmgr_modify_user(int argc, char *argv[]) list_transfer(user_cond->def_acct_list, user_cond->assoc_cond->acct_list); } else { - printf(" There was a problem with your " - "'where' options.\n"); + exit_code=1; + fprintf(stderr, + " There was a problem with your " + "'where' options.\n"); goto assoc_start; } } @@ -1216,7 +1336,8 @@ extern int sacctmgr_modify_user(int argc, char *argv[]) } else if(ret_list) { printf(" Nothing modified\n"); } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } @@ -1241,7 +1362,8 @@ assoc_start: } else if(ret_list) { printf(" Nothing modified\n"); } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } @@ -1274,21 +1396,19 @@ extern int sacctmgr_delete_user(int argc, char *argv[]) List ret_list = NULL; int set = 0; - user_cond->user_list = list_create(slurm_destroy_char); - user_cond->def_acct_list = list_create(slurm_destroy_char); - - user_cond->assoc_cond = xmalloc(sizeof(acct_association_cond_t)); - user_cond->assoc_cond->user_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->acct_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->partition_list = list_create(slurm_destroy_char); - if(!(set = _set_cond(&i, argc, argv, user_cond, NULL))) { - printf(" No conditions given to remove, not executing.\n"); + exit_code=1; + fprintf(stderr, + " No conditions given to remove, not executing.\n"); destroy_acct_user_cond(user_cond); return SLURM_ERROR; } + if(exit_code) { + destroy_acct_user_cond(user_cond); + return SLURM_ERROR; + } + notice_thread_init(); if(set == 1) { ret_list = acct_storage_g_remove_users( @@ -1322,7 +1442,8 @@ extern int sacctmgr_delete_user(int argc, char *argv[]) } else if(ret_list) { printf(" Nothing deleted\n"); } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } @@ -1345,32 +1466,23 @@ extern int sacctmgr_delete_coord(int argc, char *argv[]) ListIterator itr = NULL; List ret_list = NULL; - user_cond->user_list = list_create(slurm_destroy_char); - user_cond->def_acct_list = list_create(slurm_destroy_char); - - user_cond->assoc_cond = xmalloc(sizeof(acct_association_cond_t)); - user_cond->assoc_cond->user_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->acct_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->cluster_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->partition_list = list_create(slurm_destroy_char); - user_cond->assoc_cond->fairshare = NO_VAL; - user_cond->assoc_cond->max_cpu_secs_per_job = NO_VAL; - user_cond->assoc_cond->max_jobs = NO_VAL; - user_cond->assoc_cond->max_nodes_per_job = NO_VAL; - user_cond->assoc_cond->max_wall_duration_per_job = NO_VAL; for (i=0; i<argc; i++) { cond_set = _set_cond(&i, argc, argv, user_cond, NULL); } - if(!cond_set) { - printf(" You need to specify a user list " + if(exit_code) { + destroy_acct_user_cond(user_cond); + return SLURM_ERROR; + } else if(!cond_set) { + exit_code=1; + fprintf(stderr, " You need to specify a user list " "or account list here.\n"); destroy_acct_user_cond(user_cond); return SLURM_ERROR; } - itr = list_iterator_create(user_cond->user_list); + itr = list_iterator_create(user_cond->assoc_cond->user_list); while((name = list_next(itr))) { xstrfmtcat(user_str, " %s\n", name); @@ -1384,7 +1496,8 @@ extern int sacctmgr_delete_coord(int argc, char *argv[]) } list_iterator_destroy(itr); if(!user_str && !acct_str) { - printf(" You need to specify a user list " + exit_code=1; + fprintf(stderr, " You need to specify a user list " "or an account list here.\n"); destroy_acct_user_cond(user_cond); return SLURM_ERROR; @@ -1421,7 +1534,8 @@ extern int sacctmgr_delete_coord(int argc, char *argv[]) } else if(ret_list) { printf(" Nothing removed\n"); } else { - printf(" Error with request\n"); + exit_code=1; + fprintf(stderr, " Error with request\n"); rc = SLURM_ERROR; } diff --git a/src/salloc/Makefile.am b/src/salloc/Makefile.am index a27f098f63607db4138573666e471be22641aa8b..e2da3019f5c141fa9098908cef5e114cc2436cdf 100644 --- a/src/salloc/Makefile.am +++ b/src/salloc/Makefile.am @@ -1,6 +1,7 @@ # AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) @@ -8,7 +9,7 @@ bin_PROGRAMS = salloc salloc_SOURCES = salloc.c salloc.h opt.c opt.h -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl salloc_LDADD = \ $(convenience_libs) diff --git a/src/salloc/Makefile.in b/src/salloc/Makefile.in index 1c34e396f741c1926c094c248169b1f903098101..91ae04a4039fdbde336afa111ed1f8fa8b1eb53c 100644 --- a/src/salloc/Makefile.in +++ b/src/salloc/Makefile.in @@ -72,7 +72,8 @@ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) PROGRAMS = $(bin_PROGRAMS) am_salloc_OBJECTS = salloc.$(OBJEXT) opt.$(OBJEXT) salloc_OBJECTS = $(am_salloc_OBJECTS) -salloc_DEPENDENCIES = $(convenience_libs) +am__DEPENDENCIES_1 = $(top_builddir)/src/api/libslurm.o +salloc_DEPENDENCIES = $(am__DEPENDENCIES_1) salloc_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(salloc_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -261,9 +262,10 @@ target_vendor = @target_vendor@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) salloc_SOURCES = salloc.c salloc.h opt.c opt.h -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl salloc_LDADD = \ $(convenience_libs) @@ -466,6 +468,7 @@ install-strip: mostlyclean-generic: clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) diff --git a/src/salloc/opt.c b/src/salloc/opt.c index 9e17c746bd650d9d4f610e30b5851f024561f2f5..a46937536c3dc7c76a419671900db89ccf3500ed 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -125,7 +125,7 @@ #define LONG_OPT_NTASKSPERNODE 0x136 #define LONG_OPT_NTASKSPERSOCKET 0x137 #define LONG_OPT_NTASKSPERCORE 0x138 -#define LONG_OPT_TASK_MEM 0x13a +#define LONG_OPT_MEM_PER_CPU 0x13a #define LONG_OPT_HINT 0x13b #define LONG_OPT_ACCTG_FREQ 0x13c @@ -267,7 +267,7 @@ static void _opt_default() opt.minsockets = -1; opt.mincores = -1; opt.minthreads = -1; - opt.task_mem = -1; + opt.mem_per_cpu = -1; opt.realmem = -1; opt.tmpdisk = -1; @@ -512,8 +512,9 @@ void set_options(const int argc, char **argv) {"mincores", required_argument, 0, LONG_OPT_MINCORES}, {"minthreads", required_argument, 0, LONG_OPT_MINTHREADS}, {"mem", required_argument, 0, LONG_OPT_MEM}, - {"job-mem", required_argument, 0, LONG_OPT_TASK_MEM}, - {"task-mem", required_argument, 0, LONG_OPT_TASK_MEM}, + {"job-mem", required_argument, 0, LONG_OPT_MEM_PER_CPU}, + {"task-mem", required_argument, 0, LONG_OPT_MEM_PER_CPU}, + {"mem-per-cpu", required_argument, 0, LONG_OPT_MEM_PER_CPU}, {"hint", required_argument, 0, LONG_OPT_HINT}, {"sockets-per-node", required_argument, 0, LONG_OPT_SOCKETSPERNODE}, {"cores-per-socket", required_argument, 0, LONG_OPT_CORESPERSOCKET}, @@ -761,9 +762,9 @@ void set_options(const int argc, char **argv) exit(1); } break; - case LONG_OPT_TASK_MEM: - opt.task_mem = (int) str_to_bytes(optarg); - if (opt.task_mem < 0) { + case LONG_OPT_MEM_PER_CPU: + opt.mem_per_cpu = (int) str_to_bytes(optarg); + if (opt.mem_per_cpu < 0) { error("invalid memory constraint %s", optarg); exit(1); @@ -1015,15 +1016,11 @@ static bool _opt_verify(void) verified = false; } - /* When CR with memory as a CR is enabled we need to assign - * adequate value or check the value to opt.mem */ - if ((opt.realmem >= -1) && (opt.task_mem > 0)) { - if (opt.realmem == -1) { - opt.realmem = opt.task_mem; - } else if (opt.realmem < opt.task_mem) { - info("mem < task-mem - resizing mem to be equal " - "to task-mem"); - opt.realmem = opt.task_mem; + if ((opt.realmem > -1) && (opt.mem_per_cpu > -1)) { + if (opt.realmem < opt.mem_per_cpu) { + info("mem < mem-per-cpu - resizing mem to be equal " + "to mem-per-cpu"); + opt.realmem = opt.mem_per_cpu; } } @@ -1173,8 +1170,8 @@ static char *print_constraints() if (opt.realmem > 0) xstrfmtcat(buf, "mem=%dM ", opt.realmem); - if (opt.task_mem > 0) - xstrfmtcat(buf, "task-mem=%dM ", opt.task_mem); + if (opt.mem_per_cpu > 0) + xstrfmtcat(buf, "mem-per-cpu=%dM ", opt.mem_per_cpu); if (opt.tmpdisk > 0) xstrfmtcat(buf, "tmp=%ld ", opt.tmpdisk); @@ -1353,7 +1350,7 @@ static void _usage(void) " [--mail-type=type] [--mail-user=user][--nice[=value]]\n" " [--bell] [--no-bell] [--kill-command[=signal]]\n" " [--nodefile=file] [--nodelist=hosts] [--exclude=hosts]\n" -" [--network=type]\n" +" [--network=type] [--mem-per-cpu=MB]\n" " executable [args...]\n"); } @@ -1416,8 +1413,8 @@ static void _help(void) "Consumable resources related options:\n" " --exclusive allocate nodes in exclusive mode when\n" " cpu consumable resource is enabled\n" -" --task-mem=MB maximum amount of real memory per task\n" -" required by the job.\n" +" --mem-per-cpu=MB maximum amount of real memory per allocated\n" +" cpu required by the job.\n" " --mem >= --job-mem if --mem is specified.\n" "\n" "Affinity/Multi-core options: (when the task/affinity plugin is enabled)\n" diff --git a/src/salloc/opt.h b/src/salloc/opt.h index 972444517d95c9382d75ee8cdd176ef8cfb6a8fb..2ca869cc76af50fdb7291e96888da3ee8141e74b 100644 --- a/src/salloc/opt.h +++ b/src/salloc/opt.h @@ -107,7 +107,7 @@ typedef struct salloc_options { int minsockets; /* --minsockets=n */ int mincores; /* --mincores=n */ int minthreads; /* --minthreads=n */ - int task_mem; /* --task-mem=n */ + int mem_per_cpu; /* --mem_per_cpu=n */ int realmem; /* --mem=n */ long tmpdisk; /* --tmp=n */ char *constraints; /* --constraints=, -C constraint*/ diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index edb65cb94d70fce8145eaffe3367791a47129b67..79c5616b3c2a02b988f90101d0e600dd75ee208b 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -217,10 +217,6 @@ int main(int argc, char *argv[]) env_array_append_fmt(&env, "SLURM_ACCTG_FREQ", "%d", opt.acctg_freq); } - if (opt.task_mem >= 0) { - env_array_append_fmt(&env, "SLURM_TASK_MEM", "%d", - opt.task_mem); - } if (opt.network) env_array_append_fmt(&env, "SLURM_NETWORK", "%s", opt.network); env_array_set_environment(env); @@ -360,6 +356,8 @@ static int fill_job_desc_from_opts(job_desc_msg_t *desc) desc->job_min_threads = opt.minthreads; if (opt.realmem > -1) desc->job_min_memory = opt.realmem; + else if (opt.mem_per_cpu > -1) + desc->job_min_memory = opt.mem_per_cpu | MEM_PER_CPU; if (opt.tmpdisk > -1) desc->job_min_tmp_disk = opt.tmpdisk; if (opt.overcommit) { diff --git a/src/sattach/Makefile.am b/src/sattach/Makefile.am index e38c68075d251b6d2b971ddfa205724f1c663bed..749a846d1f84a22d96f8c6d7f7e6f70bb3a73c6c 100644 --- a/src/sattach/Makefile.am +++ b/src/sattach/Makefile.am @@ -12,7 +12,7 @@ sattach_SOURCES = \ sattach.c \ sattach.wrapper.c -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl sattach_LDADD = \ $(convenience_libs) diff --git a/src/sattach/Makefile.in b/src/sattach/Makefile.in index 45cc34b83df72cbf94f7b64baad4e703f8820a48..14d5c2fd712849f0c0615e1ca27c276d9ab880cb 100644 --- a/src/sattach/Makefile.in +++ b/src/sattach/Makefile.in @@ -73,7 +73,8 @@ PROGRAMS = $(bin_PROGRAMS) am_sattach_OBJECTS = attach.$(OBJEXT) opt.$(OBJEXT) sattach.$(OBJEXT) \ sattach.wrapper.$(OBJEXT) sattach_OBJECTS = $(am_sattach_OBJECTS) -sattach_DEPENDENCIES = $(convenience_libs) +am__DEPENDENCIES_1 = $(top_builddir)/src/api/libslurm.o +sattach_DEPENDENCIES = $(am__DEPENDENCIES_1) sattach_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sattach_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -269,7 +270,7 @@ sattach_SOURCES = \ sattach.c \ sattach.wrapper.c -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl sattach_LDADD = \ $(convenience_libs) diff --git a/src/sbatch/Makefile.am b/src/sbatch/Makefile.am index de4ffb6aaabc6288ff314317d396c6cf713e1f36..d9b382f1bad2e1217f14f3301ad01b358847f176 100644 --- a/src/sbatch/Makefile.am +++ b/src/sbatch/Makefile.am @@ -1,6 +1,7 @@ # AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) @@ -8,7 +9,7 @@ bin_PROGRAMS = sbatch sbatch_SOURCES = sbatch.c opt.c opt.h -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl sbatch_LDADD = $(convenience_libs) diff --git a/src/sbatch/Makefile.in b/src/sbatch/Makefile.in index 7cd63bdf96e48c6720c217cb9edce25bd2d3862b..edd178e682af6e40da98c6102a4ccd80646a0ace 100644 --- a/src/sbatch/Makefile.in +++ b/src/sbatch/Makefile.in @@ -72,7 +72,8 @@ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) PROGRAMS = $(bin_PROGRAMS) am_sbatch_OBJECTS = sbatch.$(OBJEXT) opt.$(OBJEXT) sbatch_OBJECTS = $(am_sbatch_OBJECTS) -sbatch_DEPENDENCIES = $(convenience_libs) +am__DEPENDENCIES_1 = $(top_builddir)/src/api/libslurm.o +sbatch_DEPENDENCIES = $(am__DEPENDENCIES_1) sbatch_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sbatch_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -261,9 +262,10 @@ target_vendor = @target_vendor@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) sbatch_SOURCES = sbatch.c opt.c opt.h -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl sbatch_LDADD = $(convenience_libs) sbatch_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) all: all-am @@ -464,6 +466,7 @@ install-strip: mostlyclean-generic: clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index 365782b68d917a3c395e87e1549c1ee79d4d9a49..263b70c204ac4b640d48e1c5f6b8ff135f2664ad 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -122,7 +122,7 @@ #define LONG_OPT_NTASKSPERNODE 0x136 #define LONG_OPT_NTASKSPERSOCKET 0x137 #define LONG_OPT_NTASKSPERCORE 0x138 -#define LONG_OPT_TASK_MEM 0x13a +#define LONG_OPT_MEM_PER_CPU 0x13a #define LONG_OPT_HINT 0x13b #define LONG_OPT_BLRTS_IMAGE 0x140 #define LONG_OPT_LINUX_IMAGE 0x141 @@ -269,7 +269,7 @@ static void _opt_default() opt.minsockets = -1; opt.mincores = -1; opt.minthreads = -1; - opt.task_mem = -1; + opt.mem_per_cpu = -1; opt.realmem = -1; opt.tmpdisk = -1; @@ -523,8 +523,9 @@ static struct option long_options[] = { {"mincores", required_argument, 0, LONG_OPT_MINCORES}, {"minthreads", required_argument, 0, LONG_OPT_MINTHREADS}, {"mem", required_argument, 0, LONG_OPT_MEM}, - {"job-mem", required_argument, 0, LONG_OPT_TASK_MEM}, - {"task-mem", required_argument, 0, LONG_OPT_TASK_MEM}, + {"job-mem", required_argument, 0, LONG_OPT_MEM_PER_CPU}, + {"task-mem", required_argument, 0, LONG_OPT_MEM_PER_CPU}, + {"mem-per-cpu", required_argument, 0, LONG_OPT_MEM_PER_CPU}, {"hint", required_argument, 0, LONG_OPT_HINT}, {"tmp", required_argument, 0, LONG_OPT_TMP}, {"jobid", required_argument, 0, LONG_OPT_JOBID}, @@ -1150,14 +1151,13 @@ static void _set_options(int argc, char **argv) exit(1); } break; - case LONG_OPT_TASK_MEM: - opt.task_mem = (int) str_to_bytes(optarg); - if (opt.task_mem < 0) { + case LONG_OPT_MEM_PER_CPU: + opt.mem_per_cpu = (int) str_to_bytes(optarg); + if (opt.mem_per_cpu < 0) { error("invalid memory constraint %s", optarg); exit(1); } - setenvf(NULL, "SLURM_TASK_MEM", "%d", opt.task_mem); break; case LONG_OPT_TMP: opt.tmpdisk = str_to_bytes(optarg); @@ -1773,15 +1773,11 @@ static bool _opt_verify(void) verified = false; } - /* When CR with memory as a CR is enabled we need to assign - * adequate value or check the value to opt.mem */ - if ((opt.realmem >= -1) && (opt.task_mem > 0)) { - if (opt.realmem == -1) { - opt.realmem = opt.task_mem; - } else if (opt.realmem < opt.task_mem) { - info("mem < task-mem - resizing mem to be equal " - "to task-mem"); - opt.realmem = opt.task_mem; + if ((opt.realmem > -1) && (opt.mem_per_cpu > -1)) { + if (opt.realmem < opt.mem_per_cpu) { + info("mem < mem-per-cpu - resizing mem to be equal " + "to mem-per-cpu"); + opt.realmem = opt.mem_per_cpu; } } @@ -1979,8 +1975,8 @@ static char *print_constraints() if (opt.realmem > 0) xstrfmtcat(buf, "mem=%dM ", opt.realmem); - if (opt.task_mem > 0) - xstrfmtcat(buf, "task-mem=%dM ", opt.task_mem); + if (opt.mem_per_cpu > 0) + xstrfmtcat(buf, "mem-per-cpu=%dM ", opt.mem_per_cpu); if (opt.tmpdisk > 0) xstrfmtcat(buf, "tmp=%ld ", opt.tmpdisk); @@ -2154,7 +2150,7 @@ static void _usage(void) " [--mail-type=type] [--mail-user=user][--nice[=value]]\n" " [--requeue] [--no-requeue] [--ntasks-per-node=n] [--propagate]\n" " [--nodefile=file] [--nodelist=hosts] [--exclude=hosts]\n" -" [--network=type]\n" +" [--network=type] [--mem-per-cpu=MB]\n" " executable [args...]\n"); } @@ -2219,9 +2215,8 @@ static void _help(void) "Consumable resources related options:\n" " --exclusive allocate nodes in exclusive mode when\n" " cpu consumable resource is enabled\n" -" --task-mem=MB maximum amount of real memory per task\n" -" required by the job.\n" -" --mem >= --job-mem if --mem is specified.\n" +" --mem-per-cpu=MB maximum amount of real memory per CPU\n" +" allocated to the job.\n" "\n" "Affinity/Multi-core options: (when the task/affinity plugin is enabled)\n" " -B --extra-node-info=S[:C[:T]] Expands to:\n" diff --git a/src/sbatch/opt.h b/src/sbatch/opt.h index b746cedf2385a2bf88724ff829572ded9c1c7175..fe53e95a1e6bb531b6bf0be8a20ec2050b269927 100644 --- a/src/sbatch/opt.h +++ b/src/sbatch/opt.h @@ -1,6 +1,6 @@ /*****************************************************************************\ * opt.h - definitions for srun option processing - * $Id: opt.h 14338 2008-06-24 23:10:32Z jette $ + * $Id: opt.h 14469 2008-07-09 18:15:23Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -113,7 +113,7 @@ typedef struct sbatch_options { int minsockets; /* --minsockets=n */ int mincores; /* --mincores=n */ int minthreads; /* --minthreads=n */ - int task_mem; /* --task-mem=n */ + int mem_per_cpu; /* --mem-per-cpu=n */ int realmem; /* --mem=n */ long tmpdisk; /* --tmp=n */ char *constraints; /* --constraints=, -C constraint*/ diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c index b94dc55428e3f943d2f18eaa3deb80177e2fc870..5cda2761f2450a4ba038c7f98f67400b6526e14e 100644 --- a/src/sbatch/sbatch.c +++ b/src/sbatch/sbatch.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * sbatch.c - Submit a SLURM batch script. * - * $Id: sbatch.c 14420 2008-07-02 19:52:49Z jette $ + * $Id: sbatch.c 14469 2008-07-09 18:15:23Z jette $ ***************************************************************************** * Copyright (C) 2006-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -227,6 +227,8 @@ static int fill_job_desc_from_opts(job_desc_msg_t *desc) desc->job_min_threads = opt.minthreads; if (opt.realmem > -1) desc->job_min_memory = opt.realmem; + else if (opt.mem_per_cpu > -1) + desc->job_min_memory = opt.mem_per_cpu | MEM_PER_CPU; if (opt.tmpdisk > -1) desc->job_min_tmp_disk = opt.tmpdisk; if (opt.overcommit) { diff --git a/src/sbcast/Makefile.am b/src/sbcast/Makefile.am index ce502c7240ab88e2ce0b657ed8ef71be0dbd82e8..bedbd116a9d51aeb4ec6b4724dfa0fc3d8f44701 100644 --- a/src/sbcast/Makefile.am +++ b/src/sbcast/Makefile.am @@ -6,7 +6,7 @@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) bin_PROGRAMS = sbcast -sbcast_LDADD = $(top_builddir)/src/api/libslurmhelper.la +sbcast_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = sbcast.h sbcast_SOURCES = agent.c sbcast.c opts.c diff --git a/src/sbcast/Makefile.in b/src/sbcast/Makefile.in index 2dc5fa5ce1a02abaccadef0021eee82890c67cee..a415e04a0d1b798c2eaa03d5f85e86cc300070ad 100644 --- a/src/sbcast/Makefile.in +++ b/src/sbcast/Makefile.in @@ -75,7 +75,7 @@ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) PROGRAMS = $(bin_PROGRAMS) am_sbcast_OBJECTS = agent.$(OBJEXT) sbcast.$(OBJEXT) opts.$(OBJEXT) sbcast_OBJECTS = $(am_sbcast_OBJECTS) -sbcast_DEPENDENCIES = $(top_builddir)/src/api/libslurmhelper.la +sbcast_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o sbcast_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sbcast_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -266,7 +266,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) -sbcast_LDADD = $(top_builddir)/src/api/libslurmhelper.la +sbcast_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = sbcast.h sbcast_SOURCES = agent.c sbcast.c opts.c sbcast_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) diff --git a/src/scancel/Makefile.am b/src/scancel/Makefile.am index 135841c4aeb12993d27a38ab518b4f0e515c01d9..95cfb3e6148ff62ae566d5fef431263049f340cb 100644 --- a/src/scancel/Makefile.am +++ b/src/scancel/Makefile.am @@ -6,7 +6,7 @@ INCLUDES = -I$(top_srcdir) bin_PROGRAMS = scancel -scancel_LDADD = $(top_builddir)/src/api/libslurmhelper.la +scancel_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = scancel.h scancel_SOURCES = scancel.c opt.c diff --git a/src/scancel/Makefile.in b/src/scancel/Makefile.in index 66ffc8f2174d730c9f4b422d5314464e6003f12c..635087993ddf076c2383218a6fb13321f9adf1a5 100644 --- a/src/scancel/Makefile.in +++ b/src/scancel/Makefile.in @@ -74,7 +74,7 @@ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) PROGRAMS = $(bin_PROGRAMS) am_scancel_OBJECTS = scancel.$(OBJEXT) opt.$(OBJEXT) scancel_OBJECTS = $(am_scancel_OBJECTS) -scancel_DEPENDENCIES = $(top_builddir)/src/api/libslurmhelper.la +scancel_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o scancel_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(scancel_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -265,7 +265,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) -scancel_LDADD = $(top_builddir)/src/api/libslurmhelper.la +scancel_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = scancel.h scancel_SOURCES = scancel.c opt.c scancel_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) diff --git a/src/scontrol/Makefile.am b/src/scontrol/Makefile.am index c450c3ded1b03bf1f67e586d822817a9a4131d42..31b51399c886401b5ce008d4e8feb827f885418f 100644 --- a/src/scontrol/Makefile.am +++ b/src/scontrol/Makefile.am @@ -16,7 +16,8 @@ scontrol_SOURCES = \ update_node.c \ update_part.c -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl + scontrol_LDADD = \ $(convenience_libs) \ diff --git a/src/scontrol/Makefile.in b/src/scontrol/Makefile.in index a872200ea574537d09313886b268d21286c70ff3..f546570c6f40cd25a7783b987f7d79fed6d093ad 100644 --- a/src/scontrol/Makefile.in +++ b/src/scontrol/Makefile.in @@ -74,8 +74,9 @@ am_scontrol_OBJECTS = info_job.$(OBJEXT) info_node.$(OBJEXT) \ info_part.$(OBJEXT) scontrol.$(OBJEXT) update_job.$(OBJEXT) \ update_node.$(OBJEXT) update_part.$(OBJEXT) scontrol_OBJECTS = $(am_scontrol_OBJECTS) -am__DEPENDENCIES_1 = -scontrol_DEPENDENCIES = $(convenience_libs) $(am__DEPENDENCIES_1) +am__DEPENDENCIES_1 = $(top_builddir)/src/api/libslurm.o +am__DEPENDENCIES_2 = +scontrol_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_2) scontrol_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(scontrol_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -275,7 +276,7 @@ scontrol_SOURCES = \ update_node.c \ update_part.c -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl scontrol_LDADD = \ $(convenience_libs) \ $(READLINE_LIBS) diff --git a/src/scontrol/info_job.c b/src/scontrol/info_job.c index 488ae8b683fbde6303925b0c2cf3a8378b1c44e4..cb8b944ff7e7eb0254807d8b75615ce2e0d118a0 100644 --- a/src/scontrol/info_job.c +++ b/src/scontrol/info_job.c @@ -2,6 +2,7 @@ * info_job.c - job information functions for scontrol. ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> * LLNL-CODE-402394. @@ -42,7 +43,8 @@ #include "src/common/stepd_api.h" static bool _in_node_bit_list(int inx, int *node_list_array); - +static int _scontrol_load_jobs(job_info_msg_t ** job_buffer_pptr, + uint32_t job_id); /* * Determine if a node index is in a node list pair array. * RET - true if specified index is in the array @@ -67,8 +69,8 @@ _in_node_bit_list(int inx, int *node_list_array) } /* Load current job table information into *job_buffer_pptr */ -extern int -scontrol_load_jobs (job_info_msg_t ** job_buffer_pptr) +static int +_scontrol_load_jobs(job_info_msg_t ** job_buffer_pptr, uint32_t job_id) { int error_code; static job_info_msg_t *old_job_info_ptr = NULL; @@ -82,8 +84,13 @@ scontrol_load_jobs (job_info_msg_t ** job_buffer_pptr) if (old_job_info_ptr) { if (last_show_flags != show_flags) old_job_info_ptr->last_update = (time_t) 0; - error_code = slurm_load_jobs (old_job_info_ptr->last_update, + if (job_id) { + error_code = slurm_load_job(&job_info_ptr, job_id); + } else { + error_code = slurm_load_jobs( + old_job_info_ptr->last_update, &job_info_ptr, show_flags); + } if (error_code == SLURM_SUCCESS) slurm_free_job_info_msg (old_job_info_ptr); else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) { @@ -92,13 +99,17 @@ scontrol_load_jobs (job_info_msg_t ** job_buffer_pptr) if (quiet_flag == -1) printf ("slurm_load_jobs no change in data\n"); } + } else if (job_id) { + error_code = slurm_load_job(&job_info_ptr, job_id); + } else { + error_code = slurm_load_jobs((time_t) NULL, &job_info_ptr, + show_flags); } - else - error_code = slurm_load_jobs ((time_t) NULL, &job_info_ptr, - show_flags); if (error_code == SLURM_SUCCESS) { old_job_info_ptr = job_info_ptr; + if (job_id) + old_job_info_ptr->last_update = (time_t) 0; last_show_flags = show_flags; *job_buffer_pptr = job_info_ptr; } @@ -154,7 +165,7 @@ scontrol_print_completing (void) node_info_msg_t *node_info_msg; uint16_t show_flags = 0; - error_code = scontrol_load_jobs (&job_info_msg); + error_code = _scontrol_load_jobs (&job_info_msg, 0); if (error_code) { exit_code = 1; if (quiet_flag != 1) @@ -234,7 +245,10 @@ scontrol_print_job (char * job_id_str) job_info_msg_t * job_buffer_ptr = NULL; job_info_t *job_ptr = NULL; - error_code = scontrol_load_jobs(&job_buffer_ptr); + if (job_id_str) + job_id = (uint32_t) strtol (job_id_str, (char **)NULL, 10); + + error_code = _scontrol_load_jobs(&job_buffer_ptr, job_id); if (error_code) { exit_code = 1; if (quiet_flag != 1) @@ -250,17 +264,12 @@ scontrol_print_job (char * job_id_str) time_str, job_buffer_ptr->record_count); } - if (job_id_str) - job_id = (uint32_t) strtol (job_id_str, (char **)NULL, 10); + job_ptr = job_buffer_ptr->job_array ; for (i = 0; i < job_buffer_ptr->record_count; i++) { - if (job_id_str && job_id != job_ptr[i].job_id) - continue; print_cnt++; slurm_print_job_info (stdout, & job_ptr[i], one_liner ) ; - if (job_id_str) - break; } if (print_cnt == 0) { diff --git a/src/scontrol/update_job.c b/src/scontrol/update_job.c index 49ae2df8ab6a1baaf52ce4c9e21d80307275241b..b981d761dfd0bec2cd6f4aa7cd81de2166035499 100644 --- a/src/scontrol/update_job.c +++ b/src/scontrol/update_job.c @@ -324,12 +324,19 @@ scontrol_update_job (int argc, char *argv[]) (char **) NULL, 10); update_cnt++; } - else if (strncasecmp(argv[i], "MinMemory=", 10) == 0) { + else if (strncasecmp(argv[i], "MinMemoryNode=", 14) == 0) { job_msg.job_min_memory = - (uint32_t) strtol(&argv[i][10], + (uint32_t) strtol(&argv[i][14], (char **) NULL, 10); update_cnt++; } + else if (strncasecmp(argv[i], "MinMemoryCPU=", 13) == 0) { + job_msg.job_min_memory = + (uint32_t) strtol(&argv[i][13], + (char **) NULL, 10); + job_msg.job_min_memory |= MEM_PER_CPU; + update_cnt++; + } else if (strncasecmp(argv[i], "MinTmpDisk=", 11) == 0) { job_msg.job_min_tmp_disk = (uint32_t) strtol(&argv[i][11], diff --git a/src/sinfo/Makefile.am b/src/sinfo/Makefile.am index 114d28cc95da93d6b77eaff27b3494a496664913..0820811a3a0d34c294e9fc0bceea8ed589d63b4d 100644 --- a/src/sinfo/Makefile.am +++ b/src/sinfo/Makefile.am @@ -6,7 +6,7 @@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) bin_PROGRAMS = sinfo -sinfo_LDADD = $(top_builddir)/src/api/libslurmhelper.la +sinfo_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = sinfo.h print.h sinfo_SOURCES = sinfo.c opts.c print.c sort.c diff --git a/src/sinfo/Makefile.in b/src/sinfo/Makefile.in index 07e64aec76a356c62ddb7740c004546a6f6c4523..75e74ad02e71f70e108d48d8df00f1ce59d8372e 100644 --- a/src/sinfo/Makefile.in +++ b/src/sinfo/Makefile.in @@ -76,7 +76,7 @@ PROGRAMS = $(bin_PROGRAMS) am_sinfo_OBJECTS = sinfo.$(OBJEXT) opts.$(OBJEXT) print.$(OBJEXT) \ sort.$(OBJEXT) sinfo_OBJECTS = $(am_sinfo_OBJECTS) -sinfo_DEPENDENCIES = $(top_builddir)/src/api/libslurmhelper.la +sinfo_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o sinfo_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sinfo_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -267,7 +267,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) -sinfo_LDADD = $(top_builddir)/src/api/libslurmhelper.la +sinfo_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = sinfo.h print.h sinfo_SOURCES = sinfo.c opts.c print.c sort.c sinfo_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 1dada0c7925e8441f4fcbc4f07af32ef1ed7614f..b9ec0a96991660ffa2af4c0f9bdf179559fb1a08 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -2,8 +2,6 @@ * job_mgr.c - manage the job information of slurm * Note: there is a global job list (job_list), time stamp * (last_job_update), and hash table (job_hash) - * - * $Id: job_mgr.c 14311 2008-06-23 18:55:55Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -1228,7 +1226,7 @@ void dump_job_desc(job_desc_msg_t * job_specs) long kill_on_node_fail, shared, immediate; long cpus_per_task, requeue, num_tasks, overcommit; long ntasks_per_node, ntasks_per_socket, ntasks_per_core; - char buf[100]; + char *mem_type, buf[100]; if (job_specs == NULL) return; @@ -1262,12 +1260,21 @@ void dump_job_desc(job_desc_msg_t * job_specs) debug3(" job_min_cores=%ld job_min_threads=%ld", job_min_cores, job_min_threads); - job_min_memory = (job_specs->job_min_memory != NO_VAL) ? - (long) job_specs->job_min_memory : -1L; + if (job_specs->job_min_memory == NO_VAL) { + job_min_memory = -1L; + mem_type = "job"; + } else if (job_specs->job_min_memory & MEM_PER_CPU) { + job_min_memory = (long) (job_specs->job_min_memory & + (~MEM_PER_CPU)); + mem_type = "cpu"; + } else { + job_min_memory = (long) job_specs->job_min_memory; + mem_type = "job"; + } job_min_tmp_disk = (job_specs->job_min_tmp_disk != NO_VAL) ? (long) job_specs->job_min_tmp_disk : -1L; - debug3(" job_min_memory=%ld job_min_tmp_disk=%ld", - job_min_memory, job_min_tmp_disk); + debug3(" min_memory_%s=%ld job_min_tmp_disk=%ld", + mem_type, job_min_memory, job_min_tmp_disk); immediate = (job_specs->immediate == 0) ? 0L : 1L; debug3(" immediate=%ld features=%s", immediate, job_specs->features); @@ -1677,6 +1684,7 @@ extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t batch_flag, job_ptr->job_state = JOB_CANCELLED; job_ptr->start_time = now; job_ptr->end_time = now; + srun_allocate_abort(job_ptr); job_completion_logger(job_ptr); delete_job_details(job_ptr); verbose("job_signal of pending job %u successful", job_id); @@ -1994,8 +2002,9 @@ static int _job_create(job_desc_msg_t * job_desc, int allocate, int will_run, if (assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec, accounting_enforce, &assoc_ptr)) { - info("_job_create: invalid account or partition for user %u", - job_desc->user_id); + info("_job_create: invalid account or partition for user %u, " + "account '%s', and partition '%s'", + job_desc->user_id, assoc_rec.acct, assoc_rec.partition); error_code = ESLURM_INVALID_ACCOUNT; return error_code; } @@ -2854,6 +2863,53 @@ static char *_copy_nodelist_no_dup(char *node_list) return xstrdup(buf); } +static bool _valid_job_min_mem(job_desc_msg_t * job_desc_msg) +{ + uint32_t base_size = job_desc_msg->job_min_memory; + uint32_t size_limit = slurmctld_conf.max_mem_per_task; + uint16_t cpus_per_node; + + if (size_limit == 0) + return true; + + if ((base_size & MEM_PER_CPU) && (size_limit & MEM_PER_CPU)) { + base_size &= (~MEM_PER_CPU); + size_limit &= (~MEM_PER_CPU); + if (base_size <= size_limit) + return true; + return false; + } + + if (((base_size & MEM_PER_CPU) == 0) && + ((size_limit & MEM_PER_CPU) == 0)) { + if (base_size <= size_limit) + return true; + return false; + } + + /* Our size is per CPU and limit per node or vise-versa. + * CPU count my vary by node, but we don't have a good + * way to identify specific nodes for the job at this + * point, so just pick the first node as a basis for + * enforcing MaxMemPerCPU. */ + if (slurmctld_conf.fast_schedule) + cpus_per_node = node_record_table_ptr[0].config_ptr->cpus; + else + cpus_per_node = node_record_table_ptr[0].cpus; + if (job_desc_msg->num_procs != NO_VAL) + cpus_per_node = MIN(cpus_per_node, job_desc_msg->num_procs); + if (base_size & MEM_PER_CPU) { + base_size &= (~MEM_PER_CPU); + base_size *= cpus_per_node; + } else { + size_limit &= (~MEM_PER_CPU); + size_limit *= cpus_per_node; + } + if (base_size <= size_limit) + return true; + return false; +} + /* * job_time_limit - terminate jobs which have exceeded their time limit * global: job_list - pointer global job list @@ -3010,6 +3066,12 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate, job_desc_msg->nice = NICE_OFFSET; } + if (job_desc_msg->job_min_memory == NO_VAL) { + /* Default memory limit is DefMemPerCPU (if set) or no limit */ + job_desc_msg->job_min_memory = slurmctld_conf.def_mem_per_task; + } else if (!_valid_job_min_mem(job_desc_msg)) + return ESLURM_INVALID_TASK_MEMORY; + if (job_desc_msg->min_sockets == (uint16_t) NO_VAL) job_desc_msg->min_sockets = 1; /* default socket count of 1 */ if (job_desc_msg->min_cores == (uint16_t) NO_VAL) @@ -3035,8 +3097,6 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate, job_desc_msg->job_min_cores = 1; /* default 1 core per socket */ if (job_desc_msg->job_min_threads == (uint16_t) NO_VAL) job_desc_msg->job_min_threads = 1; /* default 1 thread per core */ - if (job_desc_msg->job_min_memory == NO_VAL) - job_desc_msg->job_min_memory = 0; /* default no memory limit */ if (job_desc_msg->job_min_tmp_disk == NO_VAL) job_desc_msg->job_min_tmp_disk = 0;/* default 0MB disk per node */ @@ -3188,7 +3248,7 @@ extern void pack_all_jobs(char **buffer_ptr, int *buffer_size, (job_ptr->part_ptr->hidden)) continue; - if (slurmctld_conf.private_data + if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS) && (job_ptr->user_id != uid) && !validate_super_user(uid)) continue; @@ -3208,6 +3268,53 @@ extern void pack_all_jobs(char **buffer_ptr, int *buffer_size, buffer_ptr[0] = xfer_buf_data(buffer); } +/* + * pack_one_job - dump information for one jobs in + * machine independent form (for network transmission) + * OUT buffer_ptr - the pointer is set to the allocated buffer. + * OUT buffer_size - set to size of the buffer in bytes + * IN job_id - ID of job that we want info for + * IN uid - uid of user making request (for partition filtering) + * NOTE: the buffer at *buffer_ptr must be xfreed by the caller + * NOTE: change _unpack_job_desc_msg() in common/slurm_protocol_pack.c + * whenever the data format changes + */ +extern int pack_one_job(char **buffer_ptr, int *buffer_size, + uint32_t job_id, uid_t uid) +{ + ListIterator job_iterator; + struct job_record *job_ptr; + uint32_t jobs_packed = 0; + Buf buffer; + + buffer_ptr[0] = NULL; + *buffer_size = 0; + + job_iterator = list_iterator_create(job_list); + while ((job_ptr = (struct job_record *) list_next(job_iterator))) { + if (job_ptr->job_id != job_id) + continue; + + if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS) + && (job_ptr->user_id != uid) && !validate_super_user(uid)) + break; + + jobs_packed++; + break; + } + list_iterator_destroy(job_iterator); + if (jobs_packed == 0) + return ESLURM_INVALID_JOB_ID; + + buffer = init_buf(BUF_SIZE); + pack32(jobs_packed, buffer); + pack_time(time(NULL), buffer); + pack_job(job_ptr, buffer); + + *buffer_size = get_buf_offset(buffer); + buffer_ptr[0] = xfer_buf_data(buffer); + return SLURM_SUCCESS; +} /* * pack_job - dump all configuration information about a specific job in @@ -3853,12 +3960,16 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) if (job_specs->job_min_memory != NO_VAL) { if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) error_code = ESLURM_DISABLED; - else if (super_user - || (detail_ptr->job_min_memory - > job_specs->job_min_memory)) { + else if (super_user) { + char *entity; + if (job_specs->job_min_memory & MEM_PER_CPU) + entity = "cpu"; + else + entity = "job"; detail_ptr->job_min_memory = job_specs->job_min_memory; - info("update_job: setting job_min_memory to %u for " - "job_id %u", job_specs->job_min_memory, + info("update_job: setting min_memory_%s to %u for " + "job_id %u", entity, + (job_specs->job_min_memory & (~MEM_PER_CPU)), job_specs->job_id); } else { error("Attempt to increase job_min_memory for job %u", @@ -4590,7 +4701,7 @@ job_alloc_info(uint32_t uid, uint32_t job_id, struct job_record **job_pptr) if ((job_ptr->user_id != uid) && (uid != 0) && (uid != slurmctld_conf.slurm_user_id)) return ESLURM_ACCESS_DENIED; - if (slurmctld_conf.private_data + if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS) && (job_ptr->user_id != uid) && !validate_super_user(uid)) return ESLURM_ACCESS_DENIED; if (IS_JOB_PENDING(job_ptr)) diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 457827d24aa0f7826546097f1d417ae0f109c304..6f0936a9cada0efea58085d2eb62d6526b72a73c 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -458,7 +458,7 @@ extern void launch_job(struct job_record *job_ptr) launch_msg_ptr->open_mode = job_ptr->details->open_mode; launch_msg_ptr->acctg_freq = job_ptr->details->acctg_freq; - if (make_batch_job_cred(launch_msg_ptr)) { + if (make_batch_job_cred(launch_msg_ptr, job_ptr)) { error("aborting batch job %u", job_ptr->job_id); /* FIXME: This is a kludge, but this event indicates a serious * problem with OpenSSH and should never happen. We are @@ -524,9 +524,11 @@ _xduparray(uint16_t size, char ** array) * make_batch_job_cred - add a job credential to the batch_job_launch_msg * IN/OUT launch_msg_ptr - batch_job_launch_msg in which job_id, step_id, * uid and nodes have already been set + * IN job_ptr - pointer to job record * RET 0 or error code */ -extern int make_batch_job_cred(batch_job_launch_msg_t *launch_msg_ptr) +extern int make_batch_job_cred(batch_job_launch_msg_t *launch_msg_ptr, + struct job_record *job_ptr) { slurm_cred_arg_t cred_arg; @@ -534,6 +536,15 @@ extern int make_batch_job_cred(batch_job_launch_msg_t *launch_msg_ptr) cred_arg.stepid = launch_msg_ptr->step_id; cred_arg.uid = launch_msg_ptr->uid; cred_arg.hostlist = launch_msg_ptr->nodes; + if (job_ptr->details == NULL) + cred_arg.job_mem = 0; + else if (job_ptr->details->job_min_memory & MEM_PER_CPU) { + cred_arg.job_mem = job_ptr->details->job_min_memory; + cred_arg.job_mem &= (~MEM_PER_CPU); + cred_arg.job_mem *= job_ptr->alloc_lps[0]; + } else + cred_arg.job_mem = job_ptr->details->job_min_memory; + cred_arg.alloc_lps_cnt = 0; cred_arg.alloc_lps = NULL; diff --git a/src/slurmctld/job_scheduler.h b/src/slurmctld/job_scheduler.h index b40310137dcf06f569fa9b53d8370ce9c5b4e265..f09cb7e728393032185a04fc12a0a82320e0eb80 100644 --- a/src/slurmctld/job_scheduler.h +++ b/src/slurmctld/job_scheduler.h @@ -82,9 +82,11 @@ extern void launch_job(struct job_record *job_ptr); * make_batch_job_cred - add a job credential to the batch_job_launch_msg * IN/OUT launch_msg_ptr - batch_job_launch_msg in which job_id, step_id, * uid and nodes have already been set + * IN job_ptr - pointer to job record * RET 0 or error code */ -extern int make_batch_job_cred(batch_job_launch_msg_t *launch_msg_ptr); +extern int make_batch_job_cred(batch_job_launch_msg_t *launch_msg_ptr, + struct job_record *job_ptr); /* Print a job's dependency information based upon job_ptr->depend_list */ extern void print_job_dependency(struct job_record *job_ptr); diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 0bc50f8e961967d620ce6f64b5b0ca3acd23f8bc..c8870005075f37573758074caa52e584a18fa832 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -1196,7 +1196,8 @@ extern int job_req_node_filter(struct job_record *job_ptr, FREE_NULL_BITMAP(feature_bitmap); if (slurmctld_conf.fast_schedule) { if ((detail_ptr->job_min_procs > config_ptr->cpus ) - || (detail_ptr->job_min_memory > config_ptr->real_memory) + || ((detail_ptr->job_min_memory & (~MEM_PER_CPU)) > + config_ptr->real_memory) || (detail_ptr->job_min_tmp_disk > config_ptr->tmp_disk)) { bit_clear(avail_bitmap, i); continue; @@ -1213,7 +1214,8 @@ extern int job_req_node_filter(struct job_record *job_ptr, } } else { if ((detail_ptr->job_min_procs > node_ptr->cpus ) - || (detail_ptr->job_min_memory > node_ptr->real_memory) + || ((detail_ptr->job_min_memory & (~MEM_PER_CPU)) > + node_ptr->real_memory) || (detail_ptr->job_min_tmp_disk > node_ptr->tmp_disk)) { bit_clear(avail_bitmap, i); continue; @@ -1284,7 +1286,8 @@ static int _build_node_list(struct job_record *job_ptr, config_filter = 0; if ((detail_ptr->job_min_procs > config_ptr->cpus ) - || (detail_ptr->job_min_memory > config_ptr->real_memory) + || ((detail_ptr->job_min_memory & (~MEM_PER_CPU)) > + config_ptr->real_memory) || (detail_ptr->job_min_tmp_disk > config_ptr->tmp_disk)) config_filter = 1; if (mc_ptr @@ -1391,7 +1394,8 @@ static void _filter_nodes_in_set(struct node_set *node_set_ptr, node_con = node_record_table_ptr[i].config_ptr; if ((job_con->job_min_procs <= node_con->cpus) - && (job_con->job_min_memory <= node_con->real_memory) + && ((job_con->job_min_memory & (~MEM_PER_CPU)) <= + node_con->real_memory) && (job_con->job_min_tmp_disk <= node_con->tmp_disk)) job_ok = 1; if (mc_ptr @@ -1419,7 +1423,8 @@ static void _filter_nodes_in_set(struct node_set *node_set_ptr, node_ptr = &node_record_table_ptr[i]; if ((job_con->job_min_procs <= node_ptr->cpus) - && (job_con->job_min_memory <= node_ptr->real_memory) + && ((job_con->job_min_memory & (~MEM_PER_CPU)) <= + node_ptr->real_memory) && (job_con->job_min_tmp_disk <= node_ptr->tmp_disk)) job_ok = 1; if (mc_ptr diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index ed3d9d2f8c679f485ee1f570a96d135e883ab54e..3451467bd1dac315cf5750b53057bcbe0db5d79c 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -95,6 +95,7 @@ inline static void _slurm_rpc_complete_job_allocation(slurm_msg_t * msg); inline static void _slurm_rpc_complete_batch_script(slurm_msg_t * msg); inline static void _slurm_rpc_dump_conf(slurm_msg_t * msg); inline static void _slurm_rpc_dump_jobs(slurm_msg_t * msg); +inline static void _slurm_rpc_dump_job_single(slurm_msg_t * msg); inline static void _slurm_rpc_dump_nodes(slurm_msg_t * msg); inline static void _slurm_rpc_dump_partitions(slurm_msg_t * msg); inline static void _slurm_rpc_epilog_complete(slurm_msg_t * msg); @@ -149,6 +150,10 @@ void slurmctld_req (slurm_msg_t * msg) _slurm_rpc_dump_jobs(msg); slurm_free_job_info_request_msg(msg->data); break; + case REQUEST_JOB_INFO_SINGLE: + _slurm_rpc_dump_job_single(msg); + slurm_free_job_id_msg(msg->data); + break; case REQUEST_JOB_END_TIME: _slurm_rpc_end_time(msg); slurm_free_job_alloc_info_msg(msg->data); @@ -498,24 +503,47 @@ static int _make_step_cred(struct step_record *step_rec, slurm_cred_t *slurm_cred) { slurm_cred_arg_t cred_arg; + struct job_record* job_ptr = step_rec->job_ptr; - cred_arg.jobid = step_rec->job_ptr->job_id; + cred_arg.jobid = job_ptr->job_id; cred_arg.stepid = step_rec->step_id; - cred_arg.uid = step_rec->job_ptr->user_id; - cred_arg.job_mem = step_rec->job_ptr->details->job_min_memory; + cred_arg.uid = job_ptr->user_id; + cred_arg.job_mem = job_ptr->details->job_min_memory; cred_arg.task_mem = step_rec->mem_per_task; cred_arg.hostlist = step_rec->step_layout->node_list; - if(step_rec->job_ptr->details->shared == 0) - cred_arg.alloc_lps_cnt = 0; - else - cred_arg.alloc_lps_cnt = step_rec->job_ptr->alloc_lps_cnt; - if (cred_arg.alloc_lps_cnt > 0) { - cred_arg.alloc_lps = xmalloc(cred_arg.alloc_lps_cnt * + + cred_arg.alloc_lps_cnt = job_ptr->alloc_lps_cnt; + if ((cred_arg.alloc_lps_cnt > 0) && + bit_equal(job_ptr->node_bitmap, step_rec->step_node_bitmap)) { + cred_arg.alloc_lps = xmalloc(cred_arg.alloc_lps_cnt * sizeof(uint32_t)); - memcpy(cred_arg.alloc_lps, step_rec->job_ptr->alloc_lps, - cred_arg.alloc_lps_cnt*sizeof(uint32_t)); - } else + memcpy(cred_arg.alloc_lps, step_rec->job_ptr->alloc_lps, + cred_arg.alloc_lps_cnt*sizeof(uint32_t)); + } else if (cred_arg.alloc_lps_cnt > 0) { + /* Construct an array of allocated CPUs per node. + * Translate from array based upon job's allocation + * to array based upon nodes allocated to the step. */ + int i, job_inx = -1, step_inx = -1; + int job_inx_target = job_ptr->node_cnt; + cred_arg.alloc_lps = xmalloc(cred_arg.alloc_lps_cnt * + sizeof(uint32_t)); + for (i=0; i<node_record_count; i++) { + if (!bit_test(job_ptr->node_bitmap, i)) + continue; + job_inx++; + if (!bit_test(step_rec->step_node_bitmap, i)) + continue; + step_inx++; + cred_arg.alloc_lps[step_inx] = + job_ptr->alloc_lps[job_inx]; + if (job_inx == job_inx_target) + break; + } + cred_arg.alloc_lps_cnt = step_inx + 1; + } else { + error("No resources allocated to job %u", job_ptr->job_id); cred_arg.alloc_lps = NULL; + } *slurm_cred = slurm_cred_create(slurmctld_config.cred_ctx, &cred_arg); @@ -697,8 +725,8 @@ static void _slurm_rpc_dump_jobs(slurm_msg_t * msg) g_slurm_auth_get_uid(msg->auth_cred, NULL)); unlock_slurmctld(job_read_lock); END_TIMER2("_slurm_rpc_dump_jobs"); - debug2("_slurm_rpc_dump_jobs, size=%d %s", - dump_size, TIME_STR); +/* info("_slurm_rpc_dump_jobs, size=%d %s", */ +/* dump_size, TIME_STR); */ /* init response_msg structure */ slurm_msg_t_init(&response_msg); @@ -713,6 +741,44 @@ static void _slurm_rpc_dump_jobs(slurm_msg_t * msg) } } +/* _slurm_rpc_dump_job_single - process RPC for one job's state information */ +static void _slurm_rpc_dump_job_single(slurm_msg_t * msg) +{ + DEF_TIMERS; + char *dump = NULL; + int dump_size, rc; + slurm_msg_t response_msg; + job_id_msg_t *job_info_request_msg = (job_id_msg_t *) msg->data; + /* Locks: Read config job, write node (for hiding) */ + slurmctld_lock_t job_read_lock = { + READ_LOCK, READ_LOCK, NO_LOCK, WRITE_LOCK }; + uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); + + START_TIMER; + debug2("Processing RPC: REQUEST_JOB_INFO_SINGLE from uid=%u", + (unsigned int) uid); + lock_slurmctld(job_read_lock); + + rc = pack_one_job(&dump, &dump_size, job_info_request_msg->job_id, + g_slurm_auth_get_uid(msg->auth_cred, NULL)); + unlock_slurmctld(job_read_lock); + END_TIMER2("_slurm_rpc_dump_job_single"); +/* info("_slurm_rpc_dump_job_single, size=%d %s",dump_size, TIME_STR); */ + + /* init response_msg structure */ + if (rc != SLURM_SUCCESS) { + slurm_send_rc_msg(msg, rc); + } else { + slurm_msg_t_init(&response_msg); + response_msg.address = msg->address; + response_msg.msg_type = RESPONSE_JOB_INFO; + response_msg.data = dump; + response_msg.data_size = dump_size; + slurm_send_node_msg(msg->conn_fd, &response_msg); + } + xfree(dump); +} + /* _slurm_rpc_end_time - Process RPC for job end time */ static void _slurm_rpc_end_time(slurm_msg_t * msg) { @@ -767,7 +833,8 @@ static void _slurm_rpc_dump_nodes(slurm_msg_t * msg) (unsigned int) uid); lock_slurmctld(node_read_lock); - if (slurmctld_conf.private_data && !validate_super_user(uid)) { + if ((slurmctld_conf.private_data & PRIVATE_DATA_NODES) + && (!validate_super_user(uid))) { unlock_slurmctld(node_read_lock); error("Security violation, REQUEST_NODE_INFO RPC from uid=%d", uid); slurm_send_rc_msg(msg, ESLURM_USER_ID_MISSING); @@ -816,7 +883,8 @@ static void _slurm_rpc_dump_partitions(slurm_msg_t * msg) part_req_msg = (part_info_request_msg_t *) msg->data; lock_slurmctld(part_read_lock); - if (slurmctld_conf.private_data && !validate_super_user(uid)) { + if ((slurmctld_conf.private_data & PRIVATE_DATA_PARTITIONS) + && (!validate_super_user(uid))) { unlock_slurmctld(part_read_lock); debug2("Security violation, PARTITION_INFO RPC from uid=%d", uid); slurm_send_rc_msg(msg, ESLURM_USER_ID_MISSING); @@ -862,7 +930,7 @@ static void _slurm_rpc_epilog_complete(slurm_msg_t * msg) debug2("Processing RPC: MESSAGE_EPILOG_COMPLETE uid=%u", (unsigned int) uid); lock_slurmctld(job_write_lock); - if (slurmctld_conf.private_data && !validate_super_user(uid)) { + if (!validate_super_user(uid)) { unlock_slurmctld(job_write_lock); error("Security violation, EPILOG_COMPLETE RPC from uid=%u", (unsigned int) uid); @@ -2212,7 +2280,8 @@ static void _slurm_rpc_node_select_info(slurm_msg_t * msg) debug2("Processing RPC: REQUEST_NODE_SELECT_INFO from uid=%u", (unsigned int) uid); lock_slurmctld(config_read_lock); - if (slurmctld_conf.private_data && !validate_super_user(uid)) { + if ((slurmctld_conf.private_data & PRIVATE_DATA_NODES) + && (!validate_super_user(uid))) { error_code = ESLURM_USER_ID_MISSING; error("Security violation, NODE_SELECT_INFO RPC from uid=u", (unsigned int) uid); @@ -2606,7 +2675,7 @@ int _launch_batch_step(job_desc_msg_t *job_desc_msg, uid_t uid, launch_msg_ptr->uid = uid; launch_msg_ptr->nodes = xstrdup(job_ptr->nodes); - if (make_batch_job_cred(launch_msg_ptr)) { + if (make_batch_job_cred(launch_msg_ptr, job_ptr)) { error("aborting batch step %u.%u", job_ptr->job_id, job_ptr->group_id); xfree(launch_msg_ptr->nodes); @@ -2861,10 +2930,13 @@ inline static void _slurm_rpc_accounting_update_msg(slurm_msg_t *msg) case ACCT_ADD_ASSOC: case ACCT_MODIFY_ASSOC: case ACCT_REMOVE_ASSOC: - rc = assoc_mgr_update_local_assocs( - object); + rc = assoc_mgr_update_local_assocs(object); break; + case ACCT_ADD_QOS: + case ACCT_REMOVE_QOS: case ACCT_UPDATE_NOTSET: + rc = assoc_mgr_update_local_qos(object); + break; default: error("unknown type set in update_object: %d", object->type); diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 3bfa240f37e410e4c198e6b2f35f80a8564a4b93..8ef6ed0d5186a62cc25723fce44961ccbf4e1fc0 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -331,7 +331,8 @@ struct job_details { uint16_t ntasks_per_node; /* number of tasks on each node */ /* job constraints: */ uint32_t job_min_procs; /* minimum processors per node */ - uint32_t job_min_memory; /* minimum memory per node, MB */ + uint32_t job_min_memory; /* minimum memory per node (MB) OR + * memory per allocated CPU | MEM_PER_CPU */ uint32_t job_min_tmp_disk; /* minimum tempdisk per node, MB */ char *err; /* pathname of job's stderr file */ char *in; /* pathname of job's stdin file */ @@ -469,7 +470,7 @@ struct step_record { time_t pre_sus_time; /* time step ran prior to last suspend */ time_t tot_sus_time; /* total time in suspended state */ bitstr_t *step_node_bitmap; /* bitmap of nodes allocated to job - step */ + * step */ uint16_t port; /* port for srun communications */ char *host; /* host for srun communications */ uint16_t batch_step; /* 1 if batch job step, 0 otherwise */ @@ -485,9 +486,9 @@ struct step_record { uint32_t exit_code; /* highest exit code from any task */ bitstr_t *exit_node_bitmap; /* bitmap of exited nodes */ jobacctinfo_t *jobacct; /* keep track of process info in the - step */ + * step */ slurm_step_layout_t *step_layout;/* info about how tasks are laid out - in the step */ + * in the step */ }; extern List job_list; /* list of job_record entries */ @@ -1210,6 +1211,20 @@ extern void pack_job (struct job_record *dump_job_ptr, Buf buffer); */ extern void pack_part (struct part_record *part_ptr, Buf buffer); +/* + * pack_one_job - dump information for one jobs in + * machine independent form (for network transmission) + * OUT buffer_ptr - the pointer is set to the allocated buffer. + * OUT buffer_size - set to size of the buffer in bytes + * IN job_id - ID of job that we want info for + * IN uid - uid of user making request (for partition filtering) + * NOTE: the buffer at *buffer_ptr must be xfreed by the caller + * NOTE: change _unpack_job_desc_msg() in common/slurm_protocol_pack.c + * whenever the data format changes + */ +extern int pack_one_job(char **buffer_ptr, int *buffer_size, + uint32_t job_id, uid_t uid); + /* part_filter_clear - Clear the partition's hidden flag based upon a user's * group access. This must follow a call to part_filter_set() */ extern void part_filter_clear(void); @@ -1219,7 +1234,7 @@ extern void part_filter_clear(void); extern void part_filter_set(uid_t uid); /* part_fini - free all memory associated with partition records */ -void part_fini (void); +extern void part_fini (void); /* * purge_old_job - purge old job records. diff --git a/src/slurmctld/srun_comm.c b/src/slurmctld/srun_comm.c index 322c6dbb9e48aae7a0c703459d509e5884c84920..8bcfb519ba77fc11742f9f4a23aa096060d2820f 100644 --- a/src/slurmctld/srun_comm.c +++ b/src/slurmctld/srun_comm.c @@ -107,6 +107,28 @@ extern void srun_allocate (uint32_t job_id) } } +/* + * srun_allocate_abort - notify srun of a resource allocation failure + * IN job_id - id of the job allocated resource + */ +extern void srun_allocate_abort(struct job_record *job_ptr) +{ + if (job_ptr && job_ptr->alloc_resp_port && job_ptr->alloc_node + && job_ptr->resp_host) { + slurm_addr * addr; + srun_job_complete_msg_t *msg_arg; + addr = xmalloc(sizeof(struct sockaddr_in)); + slurm_set_addr(addr, job_ptr->alloc_resp_port, + job_ptr->resp_host); + msg_arg = xmalloc(sizeof(srun_timeout_msg_t)); + msg_arg->job_id = job_ptr->job_id; + msg_arg->step_id = NO_VAL; + _srun_agent_launch(addr, job_ptr->alloc_node, + SRUN_JOB_COMPLETE, + msg_arg); + } +} + /* * srun_node_fail - notify srun of a node's failure * IN job_id - id of job to notify diff --git a/src/slurmctld/srun_comm.h b/src/slurmctld/srun_comm.h index 6e796100beaef65880d401b17d008daed19f9939..7858e5a2ba7f45a845f4e6948fcf7d45d36e30f0 100644 --- a/src/slurmctld/srun_comm.h +++ b/src/slurmctld/srun_comm.h @@ -49,6 +49,12 @@ */ extern void srun_allocate (uint32_t job_id); +/* + * srun_allocate_abort - notify srun of a resource allocation failure + * IN job_id - id of the job allocated resource + */ +extern void srun_allocate_abort(struct job_record *job_ptr); + /* * srun_exec - request that srun execute a specific command * and route it's output to stdout diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index cef57533058c511793dfa88d8698192de4a09b94..0fe8834bc8941ff758e6d2d2b596cbf9ab9ca822 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * step_mgr.c - manage the job step information of slurm - * $Id: step_mgr.c 13858 2008-04-11 19:29:30Z jette $ + * $Id: step_mgr.c 14548 2008-07-17 22:00:36Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -1132,7 +1132,7 @@ extern int pack_ctld_job_step_info_response_msg(uint32_t job_id, (job_ptr->part_ptr->hidden)) continue; - if (slurmctld_conf.private_data + if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS) && (job_ptr->user_id != uid) && !validate_super_user(uid)) continue; @@ -1156,7 +1156,7 @@ extern int pack_ctld_job_step_info_response_msg(uint32_t job_id, (job_ptr->part_ptr) && (job_ptr->part_ptr->hidden)) job_ptr = NULL; - else if (slurmctld_conf.private_data + else if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS) && (job_ptr->user_id != uid) && !validate_super_user(uid)) job_ptr = NULL; @@ -1180,7 +1180,7 @@ extern int pack_ctld_job_step_info_response_msg(uint32_t job_id, && (job_ptr->part_ptr) && (job_ptr->part_ptr->hidden)) job_ptr = NULL; - else if (slurmctld_conf.private_data + else if ((slurmctld_conf.private_data & PRIVATE_DATA_JOBS) && (job_ptr->user_id != uid) && !validate_super_user(uid)) job_ptr = NULL; diff --git a/src/slurmd/slurmd/Makefile.am b/src/slurmd/slurmd/Makefile.am index ec4cebd4684813a85c4c13c89c9a6f1d46b6653a..bab7c9b772dcb399a69144e9170ee3c6b65c75a1 100644 --- a/src/slurmd/slurmd/Makefile.am +++ b/src/slurmd/slurmd/Makefile.am @@ -11,8 +11,7 @@ INCLUDES = -I$(top_srcdir) slurmd_LDADD = \ $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libeio.la \ - $(top_builddir)/src/common/libcommon.o -ldl \ + $(top_builddir)/src/api/libslurm.o -ldl \ $(PLPA_LIBS) slurmd_test_LDADD = $(slurmd_LDADD) diff --git a/src/slurmd/slurmd/Makefile.in b/src/slurmd/slurmd/Makefile.in index d92d8a3944f93bb27f00b91abdb055302541c6b9..9a70905e690ef7b4b1af2279539bef6b37f88829 100644 --- a/src/slurmd/slurmd/Makefile.in +++ b/src/slurmd/slurmd/Makefile.in @@ -81,16 +81,14 @@ am_slurmd_OBJECTS = $(am__objects_1) config.$(OBJEXT) slurmd_OBJECTS = $(am_slurmd_OBJECTS) am__DEPENDENCIES_1 = slurmd_DEPENDENCIES = $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libeio.la \ - $(top_builddir)/src/common/libcommon.o $(am__DEPENDENCIES_1) + $(top_builddir)/src/api/libslurm.o $(am__DEPENDENCIES_1) slurmd_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(slurmd_LDFLAGS) \ $(LDFLAGS) -o $@ am_slurmd_test_OBJECTS = $(am__objects_1) testconfig.$(OBJEXT) slurmd_test_OBJECTS = $(am_slurmd_test_OBJECTS) am__DEPENDENCIES_2 = $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libeio.la \ - $(top_builddir)/src/common/libcommon.o $(am__DEPENDENCIES_1) + $(top_builddir)/src/api/libslurm.o $(am__DEPENDENCIES_1) slurmd_test_DEPENDENCIES = $(am__DEPENDENCIES_2) slurmd_test_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ @@ -283,8 +281,7 @@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) slurmd_LDADD = \ $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libeio.la \ - $(top_builddir)/src/common/libcommon.o -ldl \ + $(top_builddir)/src/api/libslurm.o -ldl \ $(PLPA_LIBS) slurmd_test_LDADD = $(slurmd_LDADD) diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index dab7c08eebe584255b48a26ca40771625c0fc7a6..d5314fced80a6853b4575659394300357f6feabd 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -612,46 +612,40 @@ _forkexec_slurmstepd(slurmd_step_type_t type, void *req, */ static int _check_job_credential(launch_tasks_request_msg_t *req, uid_t uid, - int tasks_to_launch, hostset_t *step_hset) + int node_id, hostset_t *step_hset) { slurm_cred_arg_t arg; hostset_t hset = NULL; - bool user_ok = _slurm_authorized_user(uid); + bool user_ok = _slurm_authorized_user(uid); + bool verified = true; int host_index = -1; int rc; slurm_cred_t cred = req->cred; uint32_t jobid = req->job_id; uint32_t stepid = req->job_step_id; - + int tasks_to_launch = req->tasks_to_launch[node_id]; /* * First call slurm_cred_verify() so that all valid * credentials are checked */ if ((rc = slurm_cred_verify(conf->vctx, cred, &arg)) < 0) { - if (!user_ok) { + verified = false; + if (!user_ok) return SLURM_ERROR; - } else { + else { debug("_check_job_credential slurm_cred_verify failed:" " %m, but continuing anyway."); } } - /* Overwrite any memory limits in the RPC with - * contents of the credential */ - req->job_mem = arg.job_mem; - req->task_mem = arg.task_mem; - - /* - * If uid is the slurm user id or root, do not bother - * performing validity check of the credential - */ - if (user_ok) { + /* If uid is the SlurmUser or root and the credential is bad, + * then do not attempt validating the credential */ + if (!verified) { *step_hset = NULL; if (rc >= 0) { if ((hset = hostset_create(arg.hostlist))) *step_hset = hset; - xfree(arg.hostlist); - xfree(arg.alloc_lps); + slurm_cred_free_args(&arg); } return SLURM_SUCCESS; } @@ -684,12 +678,11 @@ _check_job_credential(launch_tasks_request_msg_t *req, uid_t uid, } if ((arg.alloc_lps_cnt > 0) && (tasks_to_launch > 0)) { - host_index = hostset_find(hset, conf->node_name); - /* Left in here for debugging purposes */ #if(0) - if(host_index >= 0) + /* Left for debugging purposes */ + if (host_index >= 0) info(" cons_res %u alloc_lps_cnt %u " "task[%d] = %u = task_to_launch %d host %s ", arg.jobid, arg.alloc_lps_cnt, host_index, @@ -698,35 +691,53 @@ _check_job_credential(launch_tasks_request_msg_t *req, uid_t uid, #endif if (host_index < 0) { - error("job cr credential invalid host_index %d for job %u", - host_index, arg.jobid); + error("job cr credential invalid host_index %d for " + "job %u", host_index, arg.jobid); goto fail; } - + if (host_index > arg.alloc_lps_cnt) + error("host_index > alloc_lps_cnt in credential"); + else if (arg.alloc_lps[host_index] == 0) + error("cons_res: zero processors allocated to step"); if (tasks_to_launch > arg.alloc_lps[host_index]) { - error("cons_res: More than one tasks per logical " + /* This is expected with the --overcommit option */ + verbose("cons_res: More than one tasks per logical " "processor (%d > %u) on host [%u.%u %ld %s] ", tasks_to_launch, arg.alloc_lps[host_index], arg.jobid, arg.stepid, (long) arg.uid, arg.hostlist); - error(" cons_res: Use task/affinity plug-in to bind " + verbose("cons_res: Use task/affinity plug-in to bind " "the tasks to the allocated resources"); } } + /* Overwrite any memory limits in the RPC with contents of the + * memory limit within the credential. + * Reset the CPU count on this node to correct value. */ + if (arg.job_mem & MEM_PER_CPU) { + req->job_mem = arg.job_mem & (~MEM_PER_CPU); + if ((host_index >= 0) && (host_index < arg.alloc_lps_cnt) && + (arg.alloc_lps[host_index] > 0)) + req->job_mem *= arg.alloc_lps[host_index]; + } else + req->job_mem = arg.job_mem; + req->task_mem = arg.task_mem; /* Defunct */ + if ((host_index >= 0) && (host_index < arg.alloc_lps_cnt)) + req->cpus_allocated[node_id] = arg.alloc_lps[host_index]; +#if 0 + info("mem orig:%u cpus:%u limit:%u", + arg.job_mem, arg.alloc_lps[host_index], req->job_mem); +#endif + *step_hset = hset; - xfree(arg.hostlist); - arg.alloc_lps_cnt = 0; - xfree(arg.alloc_lps); + slurm_cred_free_args(&arg); return SLURM_SUCCESS; fail: if (hset) hostset_destroy(hset); *step_hset = NULL; - xfree(arg.hostlist); - arg.alloc_lps_cnt = 0; - xfree(arg.alloc_lps); + slurm_cred_free_args(&arg); slurm_seterrno_ret(ESLURMD_INVALID_JOB_CREDENTIAL); } @@ -767,8 +778,7 @@ _rpc_launch_tasks(slurm_msg_t *msg) req->job_step_id, req->uid, req->gid, host, port); first_job_run = !slurm_cred_jobid_cached(conf->vctx, req->job_id); - if (_check_job_credential(req, req_uid, req->tasks_to_launch[nodeid], - &step_hset) < 0) { + if (_check_job_credential(req, req_uid, nodeid, &step_hset) < 0) { errnum = errno; error("Invalid job credential from %ld@%s: %m", (long) req_uid, host); @@ -809,7 +819,9 @@ _rpc_launch_tasks(slurm_msg_t *msg) job_limits_ptr->job_id = req->job_id; list_append(job_limits_list, job_limits_ptr); } - job_limits_ptr->job_mem = req->job_mem; /* reset limit */ + /* reset memory limit based upon value calculated in + * _check_job_credential() above */ + job_limits_ptr->job_mem = req->job_mem; slurm_mutex_unlock(&job_limits_mutex); } @@ -920,6 +932,28 @@ _get_user_env(batch_job_launch_msg_t *req) xfree(pwd_buf); } +/* The RPC currently contains a memory size limit, but we load the + * value from the job credential to be certain it has not been + * altered by the user */ +static void +_set_batch_job_limits(slurm_msg_t *msg) +{ + slurm_cred_arg_t arg; + batch_job_launch_msg_t *req = (batch_job_launch_msg_t *)msg->data; + + if (slurm_cred_get_args(req->cred, &arg) != SLURM_SUCCESS) + return; + + if (arg.job_mem & MEM_PER_CPU) { + req->job_mem = arg.job_mem & (~MEM_PER_CPU); + if (arg.alloc_lps_cnt > 1) + req->job_mem *= arg.alloc_lps_cnt; + } else + req->job_mem = arg.job_mem; + + slurm_cred_free_args(&arg); +} + static void _rpc_batch_job(slurm_msg_t *msg) { @@ -987,6 +1021,8 @@ _rpc_batch_job(slurm_msg_t *msg) goto done; } } + _get_user_env(req); + _set_batch_job_limits(msg); /* Since job could have been killed while the prolog was * running (especially on BlueGene, which can take minutes @@ -998,7 +1034,6 @@ _rpc_batch_job(slurm_msg_t *msg) rc = ESLURMD_CREDENTIAL_REVOKED; /* job already ran */ goto done; } - _get_user_env(req); slurm_mutex_lock(&launch_mutex); if (req->step_id == SLURM_BATCH_SCRIPT) diff --git a/src/slurmd/slurmstepd/Makefile.am b/src/slurmd/slurmstepd/Makefile.am index 4e344a18894d1abedda646c9065cd43f59646fcf..4c090b9bbfa4eed879b01b91d98459566c9f668b 100644 --- a/src/slurmd/slurmstepd/Makefile.am +++ b/src/slurmd/slurmstepd/Makefile.am @@ -9,9 +9,7 @@ INCLUDES = -I$(top_srcdir) slurmstepd_LDADD = \ $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libeio.la \ - $(top_builddir)/src/common/libspank.la \ - $(top_builddir)/src/common/libcommon.o -ldl\ + $(top_builddir)/src/api/libslurm.o -ldl\ $(PLPA_LIBS) $(PAM_LIBS) $(UTIL_LIBS) slurmstepd_SOURCES = \ diff --git a/src/slurmd/slurmstepd/Makefile.in b/src/slurmd/slurmstepd/Makefile.in index 5bc200c7e0c67d0ed4789f7e7d244e1338141ba8..61bdc4cb7ce4a5015c309c391fc67d003d070e90 100644 --- a/src/slurmd/slurmstepd/Makefile.in +++ b/src/slurmd/slurmstepd/Makefile.in @@ -81,9 +81,7 @@ am_slurmstepd_OBJECTS = slurmstepd.$(OBJEXT) mgr.$(OBJEXT) \ slurmstepd_OBJECTS = $(am_slurmstepd_OBJECTS) am__DEPENDENCIES_1 = slurmstepd_DEPENDENCIES = $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libeio.la \ - $(top_builddir)/src/common/libspank.la \ - $(top_builddir)/src/common/libcommon.o $(am__DEPENDENCIES_1) \ + $(top_builddir)/src/api/libslurm.o $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) slurmstepd_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ @@ -276,9 +274,7 @@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) slurmstepd_LDADD = \ $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libeio.la \ - $(top_builddir)/src/common/libspank.la \ - $(top_builddir)/src/common/libcommon.o -ldl\ + $(top_builddir)/src/api/libslurm.o -ldl\ $(PLPA_LIBS) $(PAM_LIBS) $(UTIL_LIBS) slurmstepd_SOURCES = \ diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index 62862620cf54d013449a41074ca1faa593a8dce4..4b5ef4d48bdce4d7bc9131236948cfa17c148f1c 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/mgr.c - job manager functions for slurmstepd - * $Id: mgr.c 14238 2008-06-11 21:54:28Z jette $ + * $Id: mgr.c 14504 2008-07-14 17:38:53Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -1801,7 +1801,7 @@ _run_script_as_user(const char *name, const char *path, slurmd_job_t *job, break; } else if (rc == 0) { sleep(1); - if ((--max_wait) == 0) { + if ((--max_wait) <= 0) { killpg(cpid, SIGKILL); opt = 0; } @@ -1810,7 +1810,7 @@ _run_script_as_user(const char *name, const char *path, slurmd_job_t *job, break; } } - /* Insure that all child processes get killed */ + /* Insure that all child processes get killed, one last time */ killpg(cpid, SIGKILL); slurm_container_signal(job->cont_id, SIGKILL); diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c index e00850f62d931a769b991778393920e2aeaed062..0f82a07c7419623105e3ecc5049446d0259946a4 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.c +++ b/src/slurmd/slurmstepd/slurmstepd_job.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/slurmstepd_job.c - slurmd_job_t routines - * $Id: slurmstepd_job.c 13755 2008-04-01 19:12:53Z jette $ + * $Id: slurmstepd_job.c 14546 2008-07-17 21:03:59Z jette $ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -213,6 +213,7 @@ job_create(launch_tasks_request_msg_t *msg) job->mem_bind_type = msg->mem_bind_type; job->mem_bind = xstrdup(msg->mem_bind); job->ckpt_path = xstrdup(msg->ckpt_path); + job->cpus_per_task = msg->cpus_per_task; job->env = _array_copy(msg->envc, msg->env); job->eio = eio_handle_create(); @@ -325,6 +326,7 @@ job_batch_job_create(batch_job_launch_msg_t *msg) job->state = SLURMSTEPD_STEP_STARTING; job->pwd = pwd; + job->cpus = msg->cpus_per_node[0]; job->ntasks = 1; job->nprocs = msg->nprocs; job->jobid = msg->job_id; @@ -364,7 +366,8 @@ job_batch_job_create(batch_job_launch_msg_t *msg) job->envtp->mem_bind_type = 0; job->envtp->mem_bind = NULL; job->envtp->ckpt_path = NULL; - + job->cpus_per_task = msg->cpus_per_node[0]; + srun = srun_info_create(NULL, NULL, NULL); list_append(job->sruns, (void *) srun); diff --git a/src/slurmd/slurmstepd/slurmstepd_job.h b/src/slurmd/slurmstepd/slurmstepd_job.h index 5b0a653efb9911c483403ec13d1c52a3ef2dde08..5c1419d659faddc08c2589ca374e377125429cbe 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.h +++ b/src/slurmd/slurmstepd/slurmstepd_job.h @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/slurmstepd_job.h slurmd_job_t definition - * $Id: slurmstepd_job.h 13672 2008-03-19 23:10:58Z jette $ + * $Id: slurmstepd_job.h 14546 2008-07-17 21:03:59Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -116,6 +116,7 @@ typedef struct slurmd_job { uint32_t nprocs; /* total number of processes in current job */ uint32_t nodeid; /* relative position of this node in job */ uint32_t ntasks; /* number of tasks on *this* node */ + uint32_t cpus_per_task; /* number of cpus desired per task */ uint32_t debug; /* debug level for job slurmd */ uint32_t job_mem; /* MB of memory reserved for the job */ uint32_t task_mem; /* MB of memory reserved for each task */ diff --git a/src/slurmd/slurmstepd/task.c b/src/slurmd/slurmstepd/task.c index 339ff6c4aa533266134e2ba2358073b280343bb2..087e0bcc83a3f44735b7e30a86a2a0c0252b106e 100644 --- a/src/slurmd/slurmstepd/task.c +++ b/src/slurmd/slurmstepd/task.c @@ -357,7 +357,7 @@ exec_task(slurmd_job_t *job, int i, int waitfd) if (job->multi_prog && task->argv[0]) { /* - * Normally the client (srun/slauch) expands the command name + * Normally the client (srun) expands the command name * to a fully qualified path, but in --multi-prog mode it * is left up to the server to search the PATH for the * executable. diff --git a/src/slurmdbd/Makefile.am b/src/slurmdbd/Makefile.am index c8c9e1613671989107f1dbf3caa6645b2aecff62..e2f99b404ffe6c4d0cc8e16db2f2efb806fc4534 100644 --- a/src/slurmdbd/Makefile.am +++ b/src/slurmdbd/Makefile.am @@ -10,7 +10,7 @@ sbin_PROGRAMS = slurmdbd slurmdbd_LDADD = \ $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libcommon.o -ldl + $(top_builddir)/src/api/libslurm.o -ldl slurmdbd_SOURCES = \ diff --git a/src/slurmdbd/Makefile.in b/src/slurmdbd/Makefile.in index 450be380c18e81e7dfe079721567815f6940f044..4da8d7e0e0bf0a897f913f73a60a1e07fdba5ff2 100644 --- a/src/slurmdbd/Makefile.in +++ b/src/slurmdbd/Makefile.in @@ -75,7 +75,7 @@ am_slurmdbd_OBJECTS = agent.$(OBJEXT) proc_req.$(OBJEXT) \ read_config.$(OBJEXT) rpc_mgr.$(OBJEXT) slurmdbd.$(OBJEXT) slurmdbd_OBJECTS = $(am_slurmdbd_OBJECTS) slurmdbd_DEPENDENCIES = $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libcommon.o + $(top_builddir)/src/api/libslurm.o slurmdbd_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(slurmdbd_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -268,7 +268,7 @@ CLEANFILES = core.* INCLUDES = -I$(top_srcdir) slurmdbd_LDADD = \ $(top_builddir)/src/common/libdaemonize.la \ - $(top_builddir)/src/common/libcommon.o -ldl + $(top_builddir)/src/api/libslurm.o -ldl slurmdbd_SOURCES = \ agent.c \ diff --git a/src/slurmdbd/proc_req.c b/src/slurmdbd/proc_req.c index 21952ea416774ec3423a65223beeffa18a04f764..a563310262ae97641ad0822259cd44add29d8126 100644 --- a/src/slurmdbd/proc_req.c +++ b/src/slurmdbd/proc_req.c @@ -55,18 +55,30 @@ static int _add_assocs(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid); static int _add_clusters(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _add_qos(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); static int _add_users(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid); static int _cluster_procs(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid); -static int _get_accounts(void *db_conn, Buf in_buffer, Buf *out_buffer); -static int _get_assocs(void *db_conn, Buf in_buffer, Buf *out_buffer); -static int _get_clusters(void *db_conn, Buf in_buffer, Buf *out_buffer); -static int _get_jobs(void *db_conn, Buf in_buffer, Buf *out_buffer); -static int _get_jobs_cond(void *db_conn, Buf in_buffer, Buf *out_buffer); +static int _get_accounts(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _get_assocs(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _get_clusters(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _get_jobs(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _get_jobs_cond(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _get_qos(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _get_txn(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); static int _get_usage(uint16_t type, void *db_conn, - Buf in_buffer, Buf *out_buffer); -static int _get_users(void *db_conn, Buf in_buffer, Buf *out_buffer); + Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _get_users(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); static int _flush_jobs(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid); static void *_init_conn(Buf in_buffer, Buf *out_buffer, uint32_t *uid); @@ -99,6 +111,8 @@ static int _remove_assocs(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid); static int _remove_clusters(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid); +static int _remove_qos(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid); static int _remove_users(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid); static int _roll_usage(void *db_conn, @@ -153,6 +167,9 @@ proc_req(void **db_conn, slurm_fd orig_fd, rc = _add_clusters(*db_conn, in_buffer, out_buffer, uid); break; + case DBD_ADD_QOS: + rc = _add_qos(*db_conn, in_buffer, out_buffer, uid); + break; case DBD_ADD_USERS: rc = _add_users(*db_conn, in_buffer, out_buffer, uid); break; @@ -161,27 +178,36 @@ proc_req(void **db_conn, slurm_fd orig_fd, in_buffer, out_buffer, uid); break; case DBD_GET_ACCOUNTS: - rc = _get_accounts(*db_conn, in_buffer, out_buffer); + rc = _get_accounts(*db_conn, + in_buffer, out_buffer, uid); break; case DBD_GET_ASSOCS: - rc = _get_assocs(*db_conn, in_buffer, out_buffer); + rc = _get_assocs(*db_conn, in_buffer, out_buffer, uid); break; case DBD_GET_ASSOC_USAGE: case DBD_GET_CLUSTER_USAGE: rc = _get_usage(msg_type, *db_conn, - in_buffer, out_buffer); + in_buffer, out_buffer, uid); break; case DBD_GET_CLUSTERS: - rc = _get_clusters(*db_conn, in_buffer, out_buffer); + rc = _get_clusters(*db_conn, + in_buffer, out_buffer, uid); break; case DBD_GET_JOBS: - rc = _get_jobs(*db_conn, in_buffer, out_buffer); + rc = _get_jobs(*db_conn, in_buffer, out_buffer, uid); break; case DBD_GET_JOBS_COND: - rc = _get_jobs_cond(*db_conn, in_buffer, out_buffer); + rc = _get_jobs_cond(*db_conn, + in_buffer, out_buffer, uid); + break; + case DBD_GET_QOS: + rc = _get_qos(*db_conn, in_buffer, out_buffer, uid); + break; + case DBD_GET_TXN: + rc = _get_txn(*db_conn, in_buffer, out_buffer, uid); break; case DBD_GET_USERS: - rc = _get_users(*db_conn, in_buffer, out_buffer); + rc = _get_users(*db_conn, in_buffer, out_buffer, uid); break; case DBD_FLUSH_JOBS: rc = _flush_jobs(*db_conn, in_buffer, out_buffer, uid); @@ -253,6 +279,9 @@ proc_req(void **db_conn, slurm_fd orig_fd, rc = _remove_clusters(*db_conn, in_buffer, out_buffer, uid); break; + case DBD_REMOVE_QOS: + rc = _remove_qos(*db_conn, in_buffer, out_buffer, uid); + break; case DBD_REMOVE_USERS: rc = _remove_users(*db_conn, in_buffer, out_buffer, uid); @@ -513,6 +542,42 @@ end_it: *out_buffer = make_dbd_rc_msg(rc, comment, DBD_ADD_CLUSTERS); return rc; } + +static int _add_qos(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) +{ + int rc = SLURM_SUCCESS; + dbd_list_msg_t *get_msg = NULL; + char *comment = NULL; + + debug2("DBD_ADD_QOS: called"); + if(*uid != slurmdbd_conf->slurm_user_id + && (assoc_mgr_get_admin_level(db_conn, *uid) + < ACCT_ADMIN_SUPER_USER)) { + comment = "Your user doesn't have privilege to preform this action"; + error("%s", comment); + rc = ESLURM_ACCESS_DENIED; + goto end_it; + } + + if (slurmdbd_unpack_list_msg(DBD_ADD_QOS, &get_msg, in_buffer) != + SLURM_SUCCESS) { + comment = "Failed to unpack DBD_ADD_QOS message"; + error("%s", comment); + rc = SLURM_ERROR; + goto end_it; + } + + rc = acct_storage_g_add_qos(db_conn, *uid, get_msg->my_list); + if(rc != SLURM_SUCCESS) + comment = "Failed to add qos."; + +end_it: + slurmdbd_free_list_msg(get_msg); + *out_buffer = make_dbd_rc_msg(rc, comment, DBD_ADD_QOS); + return rc; +} + static int _add_users(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid) { @@ -596,7 +661,8 @@ end_it: return rc; } -static int _get_accounts(void *db_conn, Buf in_buffer, Buf *out_buffer) +static int _get_accounts(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) { dbd_cond_msg_t *get_msg = NULL; dbd_list_msg_t list_msg; @@ -625,7 +691,8 @@ static int _get_accounts(void *db_conn, Buf in_buffer, Buf *out_buffer) return SLURM_SUCCESS; } -static int _get_assocs(void *db_conn, Buf in_buffer, Buf *out_buffer) +static int _get_assocs(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) { dbd_cond_msg_t *get_msg = NULL; dbd_list_msg_t list_msg; @@ -655,7 +722,8 @@ static int _get_assocs(void *db_conn, Buf in_buffer, Buf *out_buffer) return SLURM_SUCCESS; } -static int _get_clusters(void *db_conn, Buf in_buffer, Buf *out_buffer) +static int _get_clusters(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) { dbd_cond_msg_t *get_msg = NULL; dbd_list_msg_t list_msg; @@ -685,7 +753,8 @@ static int _get_clusters(void *db_conn, Buf in_buffer, Buf *out_buffer) return SLURM_SUCCESS; } -static int _get_jobs(void *db_conn, Buf in_buffer, Buf *out_buffer) +static int _get_jobs(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) { dbd_get_jobs_msg_t *get_jobs_msg = NULL; dbd_list_msg_t list_msg; @@ -703,7 +772,12 @@ static int _get_jobs(void *db_conn, Buf in_buffer, Buf *out_buffer) } memset(&sacct_params, 0, sizeof(sacct_parameters_t)); - sacct_params.opt_cluster = get_jobs_msg->cluster_name; + if (get_jobs_msg->cluster_name) { + sacct_params.opt_cluster_list = list_create(NULL); + list_append(sacct_params.opt_cluster_list, + get_jobs_msg->cluster_name); + } + sacct_params.opt_uid = -1; if(get_jobs_msg->user) { struct passwd *pw = NULL; @@ -717,6 +791,8 @@ static int _get_jobs(void *db_conn, Buf in_buffer, Buf *out_buffer) &sacct_params); slurmdbd_free_get_jobs_msg(get_jobs_msg); + if(sacct_params.opt_cluster_list) + list_destroy(sacct_params.opt_cluster_list); *out_buffer = init_buf(1024); pack16((uint16_t) DBD_GOT_JOBS, *out_buffer); @@ -727,7 +803,8 @@ static int _get_jobs(void *db_conn, Buf in_buffer, Buf *out_buffer) return SLURM_SUCCESS; } -static int _get_jobs_cond(void *db_conn, Buf in_buffer, Buf *out_buffer) +static int _get_jobs_cond(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) { dbd_cond_msg_t *cond_msg = NULL; dbd_list_msg_t list_msg; @@ -756,8 +833,66 @@ static int _get_jobs_cond(void *db_conn, Buf in_buffer, Buf *out_buffer) return SLURM_SUCCESS; } +static int _get_qos(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) +{ + dbd_cond_msg_t *cond_msg = NULL; + dbd_list_msg_t list_msg; + char *comment = NULL; + + debug2("DBD_GET_QOS: called"); + if (slurmdbd_unpack_cond_msg(DBD_GET_QOS, &cond_msg, in_buffer) != + SLURM_SUCCESS) { + comment = "Failed to unpack DBD_GET_QOS message"; + error("%s", comment); + *out_buffer = make_dbd_rc_msg(SLURM_ERROR, comment, + DBD_GET_QOS); + return SLURM_ERROR; + } + + list_msg.my_list = acct_storage_g_get_qos(db_conn, cond_msg->cond); + slurmdbd_free_cond_msg(DBD_GET_QOS, cond_msg); + + *out_buffer = init_buf(1024); + pack16((uint16_t) DBD_GOT_QOS, *out_buffer); + slurmdbd_pack_list_msg(DBD_GOT_QOS, &list_msg, *out_buffer); + if(list_msg.my_list) + list_destroy(list_msg.my_list); + + return SLURM_SUCCESS; +} + +static int _get_txn(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) +{ + dbd_cond_msg_t *cond_msg = NULL; + dbd_list_msg_t list_msg; + char *comment = NULL; + + debug2("DBD_GET_TXN: called"); + if (slurmdbd_unpack_cond_msg(DBD_GET_TXN, &cond_msg, in_buffer) != + SLURM_SUCCESS) { + comment = "Failed to unpack DBD_GET_TXN message"; + error("%s", comment); + *out_buffer = make_dbd_rc_msg(SLURM_ERROR, comment, + DBD_GET_TXN); + return SLURM_ERROR; + } + + list_msg.my_list = acct_storage_g_get_txn(db_conn, cond_msg->cond); + slurmdbd_free_cond_msg(DBD_GET_TXN, cond_msg); + + *out_buffer = init_buf(1024); + pack16((uint16_t) DBD_GOT_TXN, *out_buffer); + slurmdbd_pack_list_msg(DBD_GOT_TXN, &list_msg, *out_buffer); + if(list_msg.my_list) + list_destroy(list_msg.my_list); + + return SLURM_SUCCESS; +} + static int _get_usage(uint16_t type, void *db_conn, - Buf in_buffer, Buf *out_buffer) + Buf in_buffer, Buf *out_buffer, uint32_t *uid) { dbd_usage_msg_t *get_msg = NULL; dbd_usage_msg_t got_msg; @@ -813,7 +948,8 @@ static int _get_usage(uint16_t type, void *db_conn, return SLURM_SUCCESS; } -static int _get_users(void *db_conn, Buf in_buffer, Buf *out_buffer) +static int _get_users(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) { dbd_cond_msg_t *get_msg = NULL; dbd_list_msg_t list_msg; @@ -890,10 +1026,13 @@ static void *_init_conn(Buf in_buffer, Buf *out_buffer, uint32_t *uid) rc = SLURM_ERROR; goto end_it; } - if (init_msg->version != SLURMDBD_VERSION) { + if ((init_msg->version < SLURMDBD_VERSION_MIN) || + (init_msg->version > SLURMDBD_VERSION)) { comment = "Incompatable RPC version"; - error("Incompatable RPC version (%d != %d)", - init_msg->version, SLURMDBD_VERSION); + error("Incompatable RPC version received " + "(%u not between %d and %d)", + init_msg->version, + SLURMDBD_VERSION_MIN, SLURMDBD_VERSION); goto end_it; } *uid = init_msg->uid; @@ -1725,6 +1864,67 @@ static int _remove_clusters(void *db_conn, return rc; } +static int _remove_qos(void *db_conn, + Buf in_buffer, Buf *out_buffer, uint32_t *uid) +{ + int rc = SLURM_SUCCESS; + dbd_cond_msg_t *get_msg = NULL; + dbd_list_msg_t list_msg; + char *comment = NULL; + + debug2("DBD_REMOVE_QOS: called"); + + if(*uid != slurmdbd_conf->slurm_user_id + && assoc_mgr_get_admin_level(db_conn, *uid) + < ACCT_ADMIN_SUPER_USER) { + comment = "Your user doesn't have privilege to preform this action"; + error("%s", comment); + *out_buffer = make_dbd_rc_msg(ESLURM_ACCESS_DENIED, + comment, DBD_REMOVE_QOS); + + return ESLURM_ACCESS_DENIED; + } + + if (slurmdbd_unpack_cond_msg(DBD_REMOVE_QOS, &get_msg, + in_buffer) != SLURM_SUCCESS) { + comment = "Failed to unpack DBD_REMOVE_QOS message"; + error("%s", comment); + *out_buffer = make_dbd_rc_msg(SLURM_ERROR, + comment, DBD_REMOVE_QOS); + return SLURM_ERROR; + } + + if(!(list_msg.my_list = acct_storage_g_remove_qos( + db_conn, *uid, get_msg->cond))) { + if(errno == ESLURM_ACCESS_DENIED) { + comment = "Your user doesn't have privilege to preform this action"; + rc = ESLURM_ACCESS_DENIED; + } else if(errno == SLURM_ERROR) { + comment = "Something was wrong with your query"; + rc = SLURM_ERROR; + } else if(errno == SLURM_NO_CHANGE_IN_DATA) { + comment = "Request didn't affect anything"; + rc = SLURM_SUCCESS; + } else { + comment = "Unknown issue"; + rc = SLURM_ERROR; + } + error("%s", comment); + slurmdbd_free_cond_msg(DBD_REMOVE_QOS, get_msg); + *out_buffer = make_dbd_rc_msg(rc, comment, DBD_REMOVE_QOS); + return rc; + } + + slurmdbd_free_cond_msg(DBD_REMOVE_QOS, get_msg); + *out_buffer = init_buf(1024); + pack16((uint16_t) DBD_GOT_LIST, *out_buffer); + slurmdbd_pack_list_msg(DBD_GOT_LIST, &list_msg, *out_buffer); + if(list_msg.my_list) + list_destroy(list_msg.my_list); + + return rc; +} + static int _remove_users(void *db_conn, Buf in_buffer, Buf *out_buffer, uint32_t *uid) { diff --git a/src/smap/Makefile.am b/src/smap/Makefile.am index 2ec5094dc8aeaba7eded53b829e67d8ededa4349..24f328c3d6426e375797e86e513315200221c1b2 100644 --- a/src/smap/Makefile.am +++ b/src/smap/Makefile.am @@ -15,7 +15,7 @@ bin_PROGRAMS = smap smap_LDADD = \ $(top_builddir)/src/plugins/select/bluegene/block_allocator/libbluegene_block_allocator.la \ - $(top_builddir)/src/api/libslurmhelper.la + $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = smap.h smap_SOURCES = smap.c \ diff --git a/src/smap/Makefile.in b/src/smap/Makefile.in index 06f6ec162b72ffa78e71b41610aa42109a8e26e1..ac8923c35f48823846d80e24d509b27338d4470f 100644 --- a/src/smap/Makefile.in +++ b/src/smap/Makefile.in @@ -88,7 +88,7 @@ am__EXTRA_smap_SOURCES_DIST = smap.h smap.c job_functions.c \ opts.c smap_OBJECTS = $(am_smap_OBJECTS) @HAVE_SOME_CURSES_TRUE@smap_DEPENDENCIES = $(top_builddir)/src/plugins/select/bluegene/block_allocator/libbluegene_block_allocator.la \ -@HAVE_SOME_CURSES_TRUE@ $(top_builddir)/src/api/libslurmhelper.la +@HAVE_SOME_CURSES_TRUE@ $(top_builddir)/src/api/libslurm.o smap_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(smap_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -282,7 +282,7 @@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) @HAVE_SOME_CURSES_TRUE@smap_LDADD = \ @HAVE_SOME_CURSES_TRUE@ $(top_builddir)/src/plugins/select/bluegene/block_allocator/libbluegene_block_allocator.la \ -@HAVE_SOME_CURSES_TRUE@ $(top_builddir)/src/api/libslurmhelper.la +@HAVE_SOME_CURSES_TRUE@ $(top_builddir)/src/api/libslurm.o -ldl @HAVE_SOME_CURSES_TRUE@noinst_HEADERS = smap.h @HAVE_SOME_CURSES_TRUE@smap_SOURCES = smap.c \ diff --git a/src/squeue/Makefile.am b/src/squeue/Makefile.am index 205a78beab737f6617e107b8227cc293645da420..fb8f5d6ceb747024a022b3e092d28b7695434860 100644 --- a/src/squeue/Makefile.am +++ b/src/squeue/Makefile.am @@ -7,7 +7,7 @@ INCLUDES = -I$(top_srcdir) bin_PROGRAMS = squeue -squeue_LDADD = $(top_builddir)/src/api/libslurmhelper.la +squeue_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = squeue.h print.h squeue_SOURCES = squeue.c print.c opts.c sort.c diff --git a/src/squeue/Makefile.in b/src/squeue/Makefile.in index 1f178ae7dba09fdf9e72760d2712989cb040fc69..1e87dec5318a6a59e66b6121f003634d41664994 100644 --- a/src/squeue/Makefile.in +++ b/src/squeue/Makefile.in @@ -76,7 +76,7 @@ PROGRAMS = $(bin_PROGRAMS) am_squeue_OBJECTS = squeue.$(OBJEXT) print.$(OBJEXT) opts.$(OBJEXT) \ sort.$(OBJEXT) squeue_OBJECTS = $(am_squeue_OBJECTS) -squeue_DEPENDENCIES = $(top_builddir)/src/api/libslurmhelper.la +squeue_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o squeue_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(squeue_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -267,7 +267,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) -squeue_LDADD = $(top_builddir)/src/api/libslurmhelper.la +squeue_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = squeue.h print.h squeue_SOURCES = squeue.c print.c opts.c sort.c squeue_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) diff --git a/src/squeue/print.c b/src/squeue/print.c index 0985f5ed2d17d249bce68e559d6e7f1d082d771d..74b010636836d4032e4a630370bd9b0757df5594 100644 --- a/src/squeue/print.c +++ b/src/squeue/print.c @@ -892,6 +892,7 @@ int _print_job_min_memory(job_info_t * job, int width, bool right_justify, _print_str("MIN_MEMORY", width, right_justify, true); else { tmp_char[0] = '\0'; + job->job_min_memory &= (~MEM_PER_CPU); convert_num_unit((float)job->job_min_memory, min_mem, sizeof(min_mem), UNIT_NONE); strcat(tmp_char, min_mem); diff --git a/src/squeue/sort.c b/src/squeue/sort.c index 05c42f4f7e3b4eea4a1239a84348997e0d7cc7a0..2a0bce6371b830330e7fd0b2966a9e4fe8262828 100644 --- a/src/squeue/sort.c +++ b/src/squeue/sort.c @@ -455,6 +455,8 @@ static int _sort_job_by_min_memory(void *void1, void *void2) job_info_t *job1 = (job_info_t *) void1; job_info_t *job2 = (job_info_t *) void2; + job1->job_min_memory &= (~MEM_PER_CPU); + job2->job_min_memory &= (~MEM_PER_CPU); diff = job1->job_min_memory - job2->job_min_memory; if (reverse_order) diff --git a/src/squeue/squeue.c b/src/squeue/squeue.c index 12ab9db3b4c2c3a855e0dff8c94e1a121ccab519..0ab83b8f625afd27cfc64ead46fbe62b7fba3780 100644 --- a/src/squeue/squeue.c +++ b/src/squeue/squeue.c @@ -1,9 +1,8 @@ /*****************************************************************************\ * squeue.c - Report jobs in the slurm system - * - * $Id: squeue.c 14165 2008-05-30 21:23:22Z jette $ ***************************************************************************** - * Copyright (C) 2002 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Joey Ekstrom <ekstrom1@llnl.gov>, * Morris Jette <jette1@llnl.gov>, et. al. @@ -133,28 +132,46 @@ _print_job ( void ) static job_info_msg_t * old_job_ptr = NULL, * new_job_ptr; int error_code; uint16_t show_flags = 0; + uint32_t job_id = 0; if (params.all_flag) show_flags |= SHOW_ALL; - + if (params.job_list && (list_count(params.job_list) == 1)) { + ListIterator iterator; + uint32_t *job_id_ptr; + iterator = list_iterator_create(params.job_list); + job_id_ptr = list_next(iterator); + job_id = *job_id_ptr; + list_iterator_destroy(iterator); + } + if (old_job_ptr) { - error_code = slurm_load_jobs (old_job_ptr->last_update, - &new_job_ptr, show_flags); + if (job_id) { + error_code = slurm_load_job(&new_job_ptr, job_id); + } else { + error_code = slurm_load_jobs(old_job_ptr->last_update, + &new_job_ptr, show_flags); + } if (error_code == SLURM_SUCCESS) slurm_free_job_info_msg( old_job_ptr ); else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) { error_code = SLURM_SUCCESS; new_job_ptr = old_job_ptr; } - } - else - error_code = slurm_load_jobs ((time_t) NULL, &new_job_ptr, + } else if (job_id) { + error_code = slurm_load_job(&new_job_ptr, job_id); + } else { + error_code = slurm_load_jobs((time_t) NULL, &new_job_ptr, show_flags); + } + if (error_code) { slurm_perror ("slurm_load_jobs error"); return; } old_job_ptr = new_job_ptr; + if (job_id) + old_job_ptr->last_update = (time_t) 0; if (quiet_flag == -1) printf ("last_update_time=%ld\n", diff --git a/src/sreport/Makefile.am b/src/sreport/Makefile.am index c01f23057d469da3d3532e4d46e4e15025471fa8..fd6611ed7d4c8142ce3034177d7709171eab9681 100644 --- a/src/sreport/Makefile.am +++ b/src/sreport/Makefile.am @@ -1,6 +1,7 @@ # Makefile for sreport AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) @@ -15,12 +16,11 @@ sreport_SOURCES = \ common.c sreport_LDADD = \ - $(top_builddir)/src/common/libcommon.o -ldl \ - $(top_builddir)/src/api/libslurmhelper.la \ + $(top_builddir)/src/api/libslurm.o -ldl\ $(READLINE_LIBS) sreport_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) force: -$(convenience_libs) : force +$(sreport_LDADD) : force @cd `dirname $@` && $(MAKE) `basename $@` diff --git a/src/sreport/Makefile.in b/src/sreport/Makefile.in index b32e054f34d07dc963154e8319cd0b2458feee35..408e7d90bd2896802bd4f6206d1716d10f0340de 100644 --- a/src/sreport/Makefile.in +++ b/src/sreport/Makefile.in @@ -75,8 +75,7 @@ am_sreport_OBJECTS = sreport.$(OBJEXT) cluster_reports.$(OBJEXT) \ user_reports.$(OBJEXT) common.$(OBJEXT) sreport_OBJECTS = $(am_sreport_OBJECTS) am__DEPENDENCIES_1 = -sreport_DEPENDENCIES = $(top_builddir)/src/common/libcommon.o \ - $(top_builddir)/src/api/libslurmhelper.la \ +sreport_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o \ $(am__DEPENDENCIES_1) sreport_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sreport_LDFLAGS) \ @@ -266,6 +265,7 @@ target_vendor = @target_vendor@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) sreport_SOURCES = \ sreport.c sreport.h \ @@ -276,8 +276,7 @@ sreport_SOURCES = \ common.c sreport_LDADD = \ - $(top_builddir)/src/common/libcommon.o -ldl \ - $(top_builddir)/src/api/libslurmhelper.la \ + $(top_builddir)/src/api/libslurm.o -ldl\ $(READLINE_LIBS) sreport_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) @@ -483,6 +482,7 @@ install-strip: mostlyclean-generic: clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) @@ -566,7 +566,7 @@ uninstall-am: uninstall-binPROGRAMS force: -$(convenience_libs) : force +$(sreport_LDADD) : force @cd `dirname $@` && $(MAKE) `basename $@` # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. diff --git a/src/sreport/cluster_reports.c b/src/sreport/cluster_reports.c index 85759e62f3b8243b7b8e3e03e9d1f4074ca0b81a..8c39260dc7c1814a2965b064d08f5a8640e69632 100644 --- a/src/sreport/cluster_reports.c +++ b/src/sreport/cluster_reports.c @@ -71,9 +71,12 @@ static int _set_cond(int *start, int argc, char *argv[], int end = 0; int local_cluster_flag = all_clusters_flag; + if(!cluster_cond->cluster_list) + cluster_cond->cluster_list = list_create(slurm_destroy_char); + for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Set", 3) == 0) { + if (!strncasecmp (argv[i], "Set", 3)) { i--; break; } else if(!end && !strncasecmp(argv[i], "where", 5)) { @@ -81,24 +84,23 @@ static int _set_cond(int *start, int argc, char *argv[], } else if(!end && !strncasecmp(argv[i], "all_clusters", 1)) { local_cluster_flag = 1; continue; - } else if(!end) { - addto_char_list(cluster_cond->cluster_list, argv[i]); + } else if(!end + || !strncasecmp (argv[i], "Names", 1)) { + slurm_addto_char_list(cluster_cond->cluster_list, + argv[i]); set = 1; - } else if (strncasecmp (argv[i], "End", 1) == 0) { + } else if (!strncasecmp (argv[i], "End", 1)) { cluster_cond->usage_end = parse_time(argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Format", 1) == 0) { + } else if (!strncasecmp (argv[i], "Format", 1)) { if(format_list) - addto_char_list(format_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Names", 1) == 0) { - addto_char_list(cluster_cond->cluster_list, - argv[i]+end); - set = 1; - } else if (strncasecmp (argv[i], "Start", 1) == 0) { + slurm_addto_char_list(format_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Start", 1)) { cluster_cond->usage_start = parse_time(argv[i]+end); set = 1; } else { - printf(" Unknown condition: %s\n" + exit_code=1; + fprintf(stderr," Unknown condition: %s\n" "Use keyword set to modify value\n", argv[i]); } } @@ -123,7 +125,9 @@ static int _setup_print_fields_list(List format_list) char *object = NULL; if(!format_list || !list_count(format_list)) { - printf(" error: we need a format list to set up the print.\n"); + exit_code=1; + fprintf(stderr, " we need a format list " + "to set up the print.\n"); return SLURM_ERROR; } @@ -192,7 +196,8 @@ static int _setup_print_fields_list(List format_list) field->len = 9; field->print_routine = sreport_print_time; } else { - printf("Unknown field '%s'\n", object); + exit_code=1; + fprintf(stderr, " Unknown field '%s'\n", object); xfree(field); continue; } @@ -211,14 +216,14 @@ static List _get_cluster_list(int argc, char *argv[], uint32_t *total_time, int i=0; List cluster_list = NULL; - cluster_cond->cluster_list = list_create(slurm_destroy_char); cluster_cond->with_usage = 1; _set_cond(&i, argc, argv, cluster_cond, format_list); cluster_list = acct_storage_g_get_clusters(db_conn, cluster_cond); if(!cluster_list) { - printf(" Problem with cluster query.\n"); + exit_code=1; + fprintf(stderr, " Problem with cluster query.\n"); return NULL; } @@ -270,7 +275,7 @@ extern int cluster_utilization(int argc, char *argv[]) goto end_it; if(!list_count(format_list)) - addto_char_list(format_list, "Cl,a,d,i,res,rep"); + slurm_addto_char_list(format_list, "Cl,a,d,i,res,rep"); _setup_print_fields_list(format_list); list_destroy(format_list); @@ -311,48 +316,40 @@ extern int cluster_utilization(int argc, char *argv[]) while((field = list_next(itr2))) { switch(field->type) { case PRINT_CLUSTER_NAME: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, cluster->name); break; case PRINT_CLUSTER_CPUS: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, total_acct.cpu_count); break; case PRINT_CLUSTER_ACPU: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, total_acct.alloc_secs, total_reported); break; case PRINT_CLUSTER_DCPU: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, total_acct.down_secs, total_reported); break; case PRINT_CLUSTER_ICPU: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, total_acct.idle_secs, total_reported); break; case PRINT_CLUSTER_RCPU: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, total_acct.resv_secs, total_reported); break; case PRINT_CLUSTER_OCPU: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, total_acct.over_secs, total_reported); break; case PRINT_CLUSTER_TOTAL: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, total_reported, local_total_time); break; diff --git a/src/sreport/common.c b/src/sreport/common.c index 38fcfac371cdc3743fc80131b6ab88b155a7178c..9df97591b305d113835d4b41d8be46034fb73660 100644 --- a/src/sreport/common.c +++ b/src/sreport/common.c @@ -39,68 +39,47 @@ #include "sreport.h" -extern void sreport_print_time(type_t type, print_field_t *field, +extern void sreport_print_time(print_field_t *field, uint64_t value, uint64_t total_time) { if(!total_time) total_time = 1; - switch(type) { - case SLURM_PRINT_HEADLINE: + /* (value == unset) || (value == cleared) */ + if((value == NO_VAL) || (value == INFINITE)) { if(print_fields_parsable_print) - printf("%s|", field->name); - else - printf("%-*.*s ", field->len, field->len, field->name); - break; - case SLURM_PRINT_UNDERSCORE: - if(!print_fields_parsable_print) - printf("%-*.*s ", field->len, field->len, - "---------------------------------------"); - break; - case SLURM_PRINT_VALUE: - /* (value == unset) || (value == cleared) */ - if((value == NO_VAL) || (value == INFINITE)) { - if(print_fields_parsable_print) - printf("|"); - else - printf("%-*s ", field->len, " "); - } else { - char *output = NULL; - double percent = (double)value; - - switch(time_format) { - case SREPORT_TIME_SECS: - output = xstrdup_printf("%llu", value); - break; - case SREPORT_TIME_PERCENT: - percent /= total_time; - percent *= 100; - output = xstrdup_printf("%.2lf%%", percent); - break; - case SREPORT_TIME_SECS_PER: - percent /= total_time; - percent *= 100; - output = xstrdup_printf("%llu(%.2lf%%)", - value, percent); - break; - default: - output = xstrdup_printf("%llu", value); - break; - } - - if(print_fields_parsable_print) - printf("%s|", output); - else - printf("%*s ", field->len, output); - xfree(output); + printf("|"); + else + printf("%-*s ", field->len, " "); + } else { + char *output = NULL; + double percent = (double)value; + + switch(time_format) { + case SREPORT_TIME_SECS: + output = xstrdup_printf("%llu", value); + break; + case SREPORT_TIME_PERCENT: + percent /= total_time; + percent *= 100; + output = xstrdup_printf("%.2lf%%", percent); + break; + case SREPORT_TIME_SECS_PER: + percent /= total_time; + percent *= 100; + output = xstrdup_printf("%llu(%.2lf%%)", + value, percent); + break; + default: + output = xstrdup_printf("%llu", value); + break; } - break; - default: + if(print_fields_parsable_print) - printf("%s|", "n/a"); + printf("%s|", output); else - printf("%-*.*s ", field->len, field->len, "n/a"); - break; + printf("%*s ", field->len, output); + xfree(output); } } @@ -156,9 +135,15 @@ extern void addto_char_list(List char_list, char *names) { int i=0, start=0; char *name = NULL, *tmp_char = NULL; - ListIterator itr = list_iterator_create(char_list); + ListIterator itr = NULL; + + if(!char_list) { + error("No list was given to fill in"); + return; + } - if(names && char_list) { + itr = list_iterator_create(char_list); + if(names) { if (names[i] == '\"' || names[i] == '\'') i++; start = i; diff --git a/src/sreport/job_reports.c b/src/sreport/job_reports.c index ff249fac08652a8b8d9a4d96a8a10ad2740ef8e4..654ca7b02fa4c480f63908940f5b1297aa0a069b 100644 --- a/src/sreport/job_reports.c +++ b/src/sreport/job_reports.c @@ -112,6 +112,109 @@ static void _destroy_cluster_grouping(void *object) } } +/* returns number of objects added to list */ +extern int _addto_uid_char_list(List char_list, char *names) +{ + int i=0, start=0; + char *name = NULL, *tmp_char = NULL; + ListIterator itr = NULL; + char quote_c = '\0'; + int quote = 0; + int count = 0; + + if(!char_list) { + error("No list was given to fill in"); + return 0; + } + + itr = list_iterator_create(char_list); + if(names) { + if (names[i] == '\"' || names[i] == '\'') { + quote_c = names[i]; + quote = 1; + i++; + } + start = i; + while(names[i]) { + //info("got %d - %d = %d", i, start, i-start); + if(quote && names[i] == quote_c) + break; + else if (names[i] == '\"' || names[i] == '\'') + names[i] = '`'; + else if(names[i] == ',') { + if((i-start) > 0) { + name = xmalloc((i-start+1)); + memcpy(name, names+start, (i-start)); + //info("got %s %d", name, i-start); + if (!isdigit((int) *name)) { + struct passwd *pwd; + if (!(pwd=getpwnam(name))) { + fprintf(stderr, + "Invalid user " + "id: %s\n", + name); + exit(1); + } + xfree(name); + name = xstrdup_printf( + "%d", pwd->pw_uid); + } + + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + list_append(char_list, name); + count++; + } else + xfree(name); + list_iterator_reset(itr); + } + i++; + start = i; + if(!names[i]) { + info("There is a problem with " + "your request. It appears you " + "have spaces inside your list."); + break; + } + } + i++; + } + if((i-start) > 0) { + name = xmalloc((i-start)+1); + memcpy(name, names+start, (i-start)); + + if (!isdigit((int) *name)) { + struct passwd *pwd; + if (!(pwd=getpwnam(name))) { + fprintf(stderr, + "Invalid user id: %s\n", + name); + exit(1); + } + xfree(name); + name = xstrdup_printf("%d", pwd->pw_uid); + } + + while((tmp_char = list_next(itr))) { + if(!strcasecmp(tmp_char, name)) + break; + } + + if(!tmp_char) { + list_append(char_list, name); + count++; + } else + xfree(name); + } + } + list_iterator_destroy(itr); + return count; +} + static int _set_cond(int *start, int argc, char *argv[], acct_job_cond_t *job_cond, List format_list, List grouping_list) @@ -121,9 +224,12 @@ static int _set_cond(int *start, int argc, char *argv[], int end = 0; int local_cluster_flag = all_clusters_flag; + if(!job_cond->cluster_list) + job_cond->cluster_list = list_create(slurm_destroy_char); + for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Set", 3) == 0) { + if (!strncasecmp (argv[i], "Set", 3)) { i--; break; } else if(!end && !strncasecmp(argv[i], "where", 5)) { @@ -131,38 +237,50 @@ static int _set_cond(int *start, int argc, char *argv[], } else if(!end && !strncasecmp(argv[i], "all_clusters", 1)) { local_cluster_flag = 1; continue; - } else if(!end) { - addto_char_list(job_cond->cluster_list, argv[i]); + } else if(!end || !strncasecmp (argv[i], "Clusters", 1)) { + slurm_addto_char_list(job_cond->cluster_list, argv[i]); set = 1; - } else if (strncasecmp (argv[i], "Accounts", 2) == 0) { - addto_char_list(job_cond->acct_list, + } else if (!strncasecmp (argv[i], "Accounts", 2)) { + if(!job_cond->acct_list) + job_cond->acct_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(job_cond->acct_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Associations", 2) == 0) { - addto_char_list(job_cond->associd_list, + } else if (!strncasecmp (argv[i], "Associations", 2)) { + if(!job_cond->associd_list) + job_cond->associd_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(job_cond->associd_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Clusters", 1) == 0) { - addto_char_list(job_cond->cluster_list, + } else if (!strncasecmp (argv[i], "Clusters", 1)) { + slurm_addto_char_list(job_cond->cluster_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "End", 1) == 0) { + } else if (!strncasecmp (argv[i], "End", 1)) { job_cond->usage_end = parse_time(argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Format", 1) == 0) { + } else if (!strncasecmp (argv[i], "Format", 1)) { if(format_list) - addto_char_list(format_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Gid", 2) == 0) { - addto_char_list(job_cond->groupid_list, + slurm_addto_char_list(format_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Gid", 2)) { + if(!job_cond->groupid_list) + job_cond->groupid_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(job_cond->groupid_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "grouping", 2) == 0) { + } else if (!strncasecmp (argv[i], "grouping", 2)) { if(grouping_list) - addto_char_list(grouping_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Jobs", 1) == 0) { + slurm_addto_char_list(grouping_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Jobs", 1)) { char *end_char = NULL, *start_char = argv[i]+end; jobacct_selected_step_t *selected_step = NULL; char *dot = NULL; + if(!job_cond->step_list) + job_cond->step_list = + list_create(slurm_destroy_char); while ((end_char = strstr(start_char, ",")) && start_char) { @@ -178,33 +296,36 @@ static int _set_cond(int *start, int argc, char *argv[], dot = strstr(start_char, "."); if (dot == NULL) { debug2("No jobstep requested"); - selected_step->step = NULL; - selected_step->stepid = - (uint32_t)NO_VAL; + selected_step->stepid = NO_VAL; } else { *dot++ = 0; - selected_step->step = xstrdup(dot); selected_step->stepid = atoi(dot); } - selected_step->job = xstrdup(start_char); selected_step->jobid = atoi(start_char); start_char = end_char + 1; } set = 1; - } else if (strncasecmp (argv[i], "Partitions", 1) == 0) { - addto_char_list(job_cond->partition_list, + } else if (!strncasecmp (argv[i], "Partitions", 1)) { + if(!job_cond->partition_list) + job_cond->partition_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(job_cond->partition_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Start", 1) == 0) { + } else if (!strncasecmp (argv[i], "Start", 1)) { job_cond->usage_start = parse_time(argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Users", 1) == 0) { - addto_char_list(job_cond->user_list, - argv[i]+end); + } else if (!strncasecmp (argv[i], "Users", 1)) { + if(!job_cond->userid_list) + job_cond->userid_list = + list_create(slurm_destroy_char); + _addto_uid_char_list(job_cond->userid_list, + argv[i]+end); set = 1; } else { - printf(" Unknown condition: %s\n" + exit_code=1; + fprintf(stderr, " Unknown condition: %s\n" "Use keyword set to modify value\n", argv[i]); } } @@ -230,7 +351,9 @@ static int _setup_print_fields_list(List format_list) char *object = NULL; if(!format_list || !list_count(format_list)) { - printf(" error: we need a format list to set up the print.\n"); + exit_code=1; + fprintf(stderr, + " We need a format list to set up the print.\n"); return SLURM_ERROR; } @@ -276,7 +399,8 @@ static int _setup_print_fields_list(List format_list) field->len = 9; field->print_routine = print_fields_str; } else { - printf("Unknown field '%s'\n", object); + exit_code=1; + fprintf(stderr, " Unknown field '%s'\n", object); xfree(field); continue; } @@ -296,8 +420,9 @@ static int _setup_grouping_print_fields_list(List grouping_list) uint32_t size = 0; if(!grouping_list || !list_count(grouping_list)) { - printf(" error: we need a grouping list to " - "set up the print.\n"); + exit_code=1; + fprintf(stderr, " We need a grouping list to " + "set up the print.\n"); return SLURM_ERROR; } @@ -367,19 +492,12 @@ extern int job_sizes_grouped_by_top_acct(int argc, char *argv[]) print_fields_list = list_create(destroy_print_field); - job_cond->acct_list = list_create(slurm_destroy_char); - job_cond->associd_list = list_create(slurm_destroy_char); - job_cond->cluster_list = list_create(slurm_destroy_char); - job_cond->groupid_list = list_create(slurm_destroy_char); - job_cond->partition_list = list_create(slurm_destroy_char); - job_cond->step_list = list_create(slurm_destroy_char); - _set_cond(&i, argc, argv, job_cond, NULL, grouping_list); - addto_char_list(format_list, "Cl,a"); + slurm_addto_char_list(format_list, "Cl,a"); if(!list_count(grouping_list)) - addto_char_list(grouping_list, "50,250,500,1000"); + slurm_addto_char_list(grouping_list, "50,250,500,1000"); _setup_print_fields_list(format_list); list_destroy(format_list); @@ -388,7 +506,8 @@ extern int job_sizes_grouped_by_top_acct(int argc, char *argv[]) job_list = jobacct_storage_g_get_jobs_cond(db_conn, job_cond); if(!job_list) { - printf(" Problem with job query.\n"); + exit_code=1; + fprintf(stderr, " Problem with job query.\n"); goto end_it; } @@ -599,13 +718,11 @@ no_assocs: switch(field->type) { case PRINT_JOB_CLUSTER: field->print_routine( - SLURM_PRINT_VALUE, field, cluster_group->cluster); break; case PRINT_JOB_ACCOUNT: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, acct_group->acct); break; default: @@ -619,7 +736,6 @@ no_assocs: switch(field->type) { case PRINT_JOB_SIZE: field->print_routine( - SLURM_PRINT_VALUE, field, local_group->cpu_secs, acct_group->cpu_secs); @@ -630,8 +746,7 @@ no_assocs: } list_iterator_reset(itr2); list_iterator_destroy(local_itr); - total_field.print_routine(SLURM_PRINT_VALUE, - &total_field, + total_field.print_routine(&total_field, acct_group->cpu_secs, cluster_group->cpu_secs); diff --git a/src/sreport/sreport.h b/src/sreport/sreport.h index 2633a6cf378dbeb978ca51f3ea89d8c1665413c0..d8f25640da3666761d2f6fb60b6267374fa90273 100644 --- a/src/sreport/sreport.h +++ b/src/sreport/sreport.h @@ -102,11 +102,10 @@ extern void *db_conn; extern uint32_t my_uid; extern int all_clusters_flag; -extern void sreport_print_time(type_t type, print_field_t *field, +extern void sreport_print_time(print_field_t *field, uint64_t value, uint64_t total_time); extern int parse_option_end(char *option); extern char *strip_quotes(char *option, int *increased); -extern void addto_char_list(List char_list, char *names); extern int set_start_end_time(time_t *start, time_t *end); #endif /* HAVE_SREPORT_H */ diff --git a/src/sreport/user_reports.c b/src/sreport/user_reports.c index fa1ff534029e22398f1e74f0b342d5dbe78ea726..a1496a48fe5e5b897fede185dc339829edc4cac9 100644 --- a/src/sreport/user_reports.c +++ b/src/sreport/user_reports.c @@ -129,9 +129,6 @@ static int _set_cond(int *start, int argc, char *argv[], return SLURM_ERROR; } - if(!user_cond->user_list) - user_cond->user_list = list_create(slurm_destroy_char); - user_cond->with_deleted = 1; user_cond->with_assocs = 1; if(!user_cond->assoc_cond) { @@ -140,14 +137,13 @@ static int _set_cond(int *start, int argc, char *argv[], user_cond->assoc_cond->with_usage = 1; } assoc_cond = user_cond->assoc_cond; - if(!assoc_cond->acct_list) - assoc_cond->acct_list = list_create(slurm_destroy_char); + if(!assoc_cond->cluster_list) assoc_cond->cluster_list = list_create(slurm_destroy_char); for (i=(*start); i<argc; i++) { end = parse_option_end(argv[i]); - if (strncasecmp (argv[i], "Set", 3) == 0) { + if (!strncasecmp (argv[i], "Set", 3)) { i--; break; } else if(!end && !strncasecmp(argv[i], "where", 5)) { @@ -157,32 +153,37 @@ static int _set_cond(int *start, int argc, char *argv[], continue; } else if (!end && !strncasecmp(argv[i], "group", 1)) { group_accts = 1; - } else if(!end) { - addto_char_list(user_cond->user_list, argv[i]); + } else if(!end + || !strncasecmp (argv[i], "Users", 1)) { + if(!assoc_cond->user_list) + assoc_cond->user_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(assoc_cond->user_list, + argv[i]); set = 1; - } else if (strncasecmp (argv[i], "Accounts", 2) == 0) { - addto_char_list(assoc_cond->acct_list, + } else if (!strncasecmp (argv[i], "Accounts", 2)) { + if(!assoc_cond->acct_list) + assoc_cond->acct_list = + list_create(slurm_destroy_char); + slurm_addto_char_list(assoc_cond->acct_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Clusters", 1) == 0) { - addto_char_list(assoc_cond->cluster_list, + } else if (!strncasecmp (argv[i], "Clusters", 1)) { + slurm_addto_char_list(assoc_cond->cluster_list, argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "End", 1) == 0) { + } else if (!strncasecmp (argv[i], "End", 1)) { assoc_cond->usage_end = parse_time(argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Format", 1) == 0) { + } else if (!strncasecmp (argv[i], "Format", 1)) { if(format_list) - addto_char_list(format_list, argv[i]+end); - } else if (strncasecmp (argv[i], "Start", 1) == 0) { + slurm_addto_char_list(format_list, argv[i]+end); + } else if (!strncasecmp (argv[i], "Start", 1)) { assoc_cond->usage_start = parse_time(argv[i]+end); set = 1; - } else if (strncasecmp (argv[i], "Users", 1) == 0) { - addto_char_list(user_cond->user_list, - argv[i]+end); - set = 1; } else { - printf(" Unknown condition: %s\n" + exit_code=1; + fprintf(stderr, " Unknown condition: %s\n" "Use keyword set to modify value\n", argv[i]); } } @@ -207,7 +208,9 @@ static int _setup_print_fields_list(List format_list) char *object = NULL; if(!format_list || !list_count(format_list)) { - printf(" error: we need a format list to set up the print.\n"); + exit_code=1; + fprintf(stderr, + " We need a format list to set up the print.\n"); return SLURM_ERROR; } @@ -246,7 +249,8 @@ static int _setup_print_fields_list(List format_list) field->len = 10; field->print_routine = sreport_print_time; } else { - printf("Unknown field '%s'\n", object); + exit_code=1; + fprintf(stderr, " Unknown field '%s'\n", object); xfree(field); continue; } @@ -284,14 +288,15 @@ extern int user_top(int argc, char *argv[]) _set_cond(&i, argc, argv, user_cond, format_list); if(!list_count(format_list)) - addto_char_list(format_list, "Cl,L,P,A,U"); + slurm_addto_char_list(format_list, "Cl,L,P,A,U"); _setup_print_fields_list(format_list); list_destroy(format_list); user_list = acct_storage_g_get_users(db_conn, user_cond); if(!user_list) { - printf(" Problem with user query.\n"); + exit_code=1; + fprintf(stderr, " Problem with user query.\n"); goto end_it; } @@ -443,20 +448,17 @@ extern int user_top(int argc, char *argv[]) } list_iterator_destroy(itr3); field->print_routine( - SLURM_PRINT_VALUE, field, tmp_char); xfree(tmp_char); break; case PRINT_USER_CLUSTER: field->print_routine( - SLURM_PRINT_VALUE, field, local_cluster->name); break; case PRINT_USER_LOGIN: - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, local_user->name); break; case PRINT_USER_PROPER: @@ -468,13 +470,11 @@ extern int user_top(int argc, char *argv[]) tmp_char = pwd->pw_gecos; } - field->print_routine(SLURM_PRINT_VALUE, - field, + field->print_routine(field, tmp_char); break; case PRINT_USER_USED: field->print_routine( - SLURM_PRINT_VALUE, field, local_user->cpu_secs, local_cluster->cpu_secs); diff --git a/src/srun/Makefile.am b/src/srun/Makefile.am index 80a846909da642901012de95128b9d9dfdffee55..5809c715abf848bd598de1b8fc6221711e742a3c 100644 --- a/src/srun/Makefile.am +++ b/src/srun/Makefile.am @@ -1,6 +1,7 @@ # AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) @@ -22,7 +23,8 @@ srun_SOURCES = \ multi_prog.c multi_prog.h \ srun.wrapper.c -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = \ + $(top_builddir)/src/api/libslurm.o -ldl srun_LDADD = \ $(convenience_libs) diff --git a/src/srun/Makefile.in b/src/srun/Makefile.in index 4485b7d17062a5582aee3cfdd870bd36d23e761c..0f9dbabe6de5537f29b7dda679501cd0f98216bd 100644 --- a/src/srun/Makefile.in +++ b/src/srun/Makefile.in @@ -75,7 +75,8 @@ am_srun_OBJECTS = srun.$(OBJEXT) opt.$(OBJEXT) srun_job.$(OBJEXT) \ allocate.$(OBJEXT) core-format.$(OBJEXT) multi_prog.$(OBJEXT) \ srun.wrapper.$(OBJEXT) srun_OBJECTS = $(am_srun_OBJECTS) -srun_DEPENDENCIES = $(convenience_libs) +am__DEPENDENCIES_1 = $(top_builddir)/src/api/libslurm.o +srun_DEPENDENCIES = $(am__DEPENDENCIES_1) srun_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(srun_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -264,6 +265,7 @@ target_vendor = @target_vendor@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign +CLEANFILES = core.* INCLUDES = -I$(top_srcdir) srun_SOURCES = \ srun.c srun.h \ @@ -281,7 +283,9 @@ srun_SOURCES = \ multi_prog.c multi_prog.h \ srun.wrapper.c -convenience_libs = $(top_builddir)/src/api/libslurmhelper.la +convenience_libs = \ + $(top_builddir)/src/api/libslurm.o -ldl + srun_LDADD = \ $(convenience_libs) @@ -492,6 +496,7 @@ install-strip: mostlyclean-generic: clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) distclean-generic: -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) diff --git a/src/srun/allocate.c b/src/srun/allocate.c index 130f55e1a1d83a7bdd17bc57001da2498e59c4f1..505f6b697c8beafac521edebd43534e250473e2e 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/srun/allocate.c - srun functions for managing node allocations - * $Id: allocate.c 14453 2008-07-08 20:26:18Z da $ + * $Id: allocate.c 14570 2008-07-18 22:06:26Z da $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -80,7 +80,6 @@ static uint32_t pending_job_id = 0; * Static Prototypes */ static void _set_pending_job_id(uint32_t job_id); -static void _ignore_signal(int signo); static void _exit_on_signal(int signo); static void _signal_while_allocating(int signo); static void _intr_handler(int signo); @@ -101,11 +100,6 @@ static void _signal_while_allocating(int signo) } } -static void _ignore_signal(int signo) -{ - /* do nothing */ -} - static void _exit_on_signal(int signo) { exit_flag = true; @@ -114,7 +108,10 @@ static void _exit_on_signal(int signo) /* This typically signifies the job was cancelled by scancel */ static void _job_complete_handler(srun_job_complete_msg_t *msg) { - info("Force Terminated job"); + if((int)msg->step_id >= 0) + info("Force Terminated job %u.%u", msg->job_id, msg->step_id); + else + info("Force Terminated job %u", msg->job_id); } /* @@ -246,18 +243,24 @@ allocate_nodes(void) } xsignal(SIGHUP, _exit_on_signal); - xsignal(SIGINT, _ignore_signal); - xsignal(SIGQUIT, _ignore_signal); - xsignal(SIGPIPE, _ignore_signal); - xsignal(SIGTERM, _ignore_signal); - xsignal(SIGUSR1, _ignore_signal); - xsignal(SIGUSR2, _ignore_signal); + xsignal(SIGINT, ignore_signal); + xsignal(SIGQUIT, ignore_signal); + xsignal(SIGPIPE, ignore_signal); + xsignal(SIGTERM, ignore_signal); + xsignal(SIGUSR1, ignore_signal); + xsignal(SIGUSR2, ignore_signal); job_desc_msg_destroy(j); return resp; } +void +ignore_signal(int signo) +{ + /* do nothing */ +} + int cleanup_allocation() { @@ -450,7 +453,9 @@ job_desc_msg_create_from_opts () if (opt.job_min_threads != NO_VAL) j->job_min_threads = opt.job_min_threads; if (opt.job_min_memory != NO_VAL) - j->job_min_memory = opt.job_min_memory; + j->job_min_memory = opt.job_min_memory; + else if (opt.mem_per_cpu != NO_VAL) + j->job_min_memory = opt.mem_per_cpu | MEM_PER_CPU; if (opt.job_min_tmp_disk != NO_VAL) j->job_min_tmp_disk = opt.job_min_tmp_disk; if (opt.overcommit) { @@ -511,8 +516,6 @@ create_job_step(srun_job_t *job) : (opt.nprocs*opt.cpus_per_task); job->ctx_params.relative = (uint16_t)opt.relative; - if (opt.task_mem != NO_VAL) - job->ctx_params.mem_per_task = (uint16_t)opt.task_mem; job->ctx_params.ckpt_interval = (uint16_t)opt.ckpt_interval; job->ctx_params.ckpt_path = opt.ckpt_path; job->ctx_params.exclusive = (uint16_t)opt.exclusive; diff --git a/src/srun/allocate.h b/src/srun/allocate.h index 91474c653ae69492f005fe4f02b069d5a9c12bff..85672c5528384a10d90558f0541a10dbab120aa2 100644 --- a/src/srun/allocate.h +++ b/src/srun/allocate.h @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/srun/allocate.h - node allocation functions for srun - * $Id: allocate.h 13672 2008-03-19 23:10:58Z jette $ + * $Id: allocate.h 14570 2008-07-18 22:06:26Z da $ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -60,6 +60,9 @@ slurmctld_comm_addr_t slurmctld_comm_addr; */ resource_allocation_response_msg_t * allocate_nodes(void); +/* dummy function to handle all signals we want to ignore */ +void ignore_signal(int signo); + /* clean up the msg thread polling for information from the controller */ int cleanup_allocation(); diff --git a/src/srun/opt.c b/src/srun/opt.c index 625c3ad469346730325c632743e461a5ab7fb5ec..94bfff45fc2b4022f6ac3b8ae4f0fd245773d87c 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -1,8 +1,8 @@ /*****************************************************************************\ * opt.c - options processing for srun - * $Id: opt.c 14420 2008-07-02 19:52:49Z jette $ ***************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Mark Grondona <grondona1@llnl.gov>, et. al. * LLNL-CODE-402394. @@ -153,7 +153,7 @@ #define LONG_OPT_NTASKSPERNODE 0x136 #define LONG_OPT_NTASKSPERSOCKET 0x137 #define LONG_OPT_NTASKSPERCORE 0x138 -#define LONG_OPT_TASK_MEM 0x13a +#define LONG_OPT_MEM_PER_CPU 0x13a #define LONG_OPT_HINT 0x13b #define LONG_OPT_BLRTS_IMAGE 0x140 #define LONG_OPT_LINUX_IMAGE 0x141 @@ -656,7 +656,7 @@ static void _opt_default() opt.job_min_cores = NO_VAL; opt.job_min_threads = NO_VAL; opt.job_min_memory = NO_VAL; - opt.task_mem = NO_VAL; + opt.mem_per_cpu = NO_VAL; opt.job_min_tmp_disk= NO_VAL; opt.hold = false; @@ -777,7 +777,6 @@ env_vars_t env_vars[] = { {"SLURM_EXCLUSIVE", OPT_EXCLUSIVE, NULL, NULL }, {"SLURM_OPEN_MODE", OPT_OPEN_MODE, NULL, NULL }, {"SLURM_ACCTG_FREQ", OPT_INT, &opt.acctg_freq, NULL }, -{"SLURM_TASK_MEM", OPT_INT, &opt.task_mem, NULL }, {"SLURM_NETWORK", OPT_STRING, &opt.network, NULL }, {NULL, 0, NULL, NULL} }; @@ -991,8 +990,9 @@ static void set_options(const int argc, char **argv) {"mincores", required_argument, 0, LONG_OPT_MINCORES}, {"minthreads", required_argument, 0, LONG_OPT_MINTHREADS}, {"mem", required_argument, 0, LONG_OPT_MEM}, - {"job-mem", required_argument, 0, LONG_OPT_TASK_MEM}, - {"task-mem", required_argument, 0, LONG_OPT_TASK_MEM}, + {"job-mem", required_argument, 0, LONG_OPT_MEM_PER_CPU}, + {"task-mem", required_argument, 0, LONG_OPT_MEM_PER_CPU}, + {"mem-per-cpu", required_argument, 0, LONG_OPT_MEM_PER_CPU}, {"hint", required_argument, 0, LONG_OPT_HINT}, {"mpi", required_argument, 0, LONG_OPT_MPI}, {"tmp", required_argument, 0, LONG_OPT_TMP}, @@ -1314,9 +1314,9 @@ static void set_options(const int argc, char **argv) exit(1); } break; - case LONG_OPT_TASK_MEM: - opt.task_mem = (int) str_to_bytes(optarg); - if (opt.task_mem < 0) { + case LONG_OPT_MEM_PER_CPU: + opt.mem_per_cpu = (int) str_to_bytes(optarg); + if (opt.mem_per_cpu < 0) { error("invalid memory constraint %s", optarg); exit(1); @@ -1626,15 +1626,11 @@ static void _opt_args(int argc, char **argv) set_options(argc, argv); - /* When CR with memory as a CR is enabled we need to assign - * adequate value or check the value to opt.mem */ - if ((opt.job_min_memory >= -1) && (opt.task_mem > 0)) { - if (opt.job_min_memory == -1) { - opt.job_min_memory = opt.task_mem; - } else if (opt.job_min_memory < opt.task_mem) { - info("mem < task-mem - resizing mem to be equal " - "to task-mem"); - opt.job_min_memory = opt.task_mem; + if ((opt.job_min_memory > -1) && (opt.mem_per_cpu > -1)) { + if (opt.job_min_memory < opt.mem_per_cpu) { + info("mem < mem-per-cpu - resizing mem to be equal " + "to mem-per-cpu"); + opt.job_min_memory = opt.mem_per_cpu; } } @@ -2030,19 +2026,6 @@ static bool _opt_verify(void) xfree(sched_name); } - if (opt.task_mem > 0) { - uint32_t max_mem = slurm_get_max_mem_per_task(); - if (max_mem && (opt.task_mem > max_mem)) { - info("WARNING: Reducing --task-mem to system maximum " - "of %u MB", max_mem); - opt.task_mem = max_mem; - } - } else { - uint32_t max_mem = slurm_get_def_mem_per_task(); - if (max_mem) - opt.task_mem = max_mem; - } - return verified; } @@ -2069,8 +2052,8 @@ static char *print_constraints() if (opt.job_min_memory > 0) xstrfmtcat(buf, "mem=%dM ", opt.job_min_memory); - if (opt.task_mem > 0) - xstrfmtcat(buf, "task-mem=%dM ", opt.task_mem); + if (opt.mem_per_cpu > 0) + xstrfmtcat(buf, "mem-per-cpu=%dM ", opt.mem_per_cpu); if (opt.job_min_tmp_disk > 0) xstrfmtcat(buf, "tmp=%ld ", opt.job_min_tmp_disk); @@ -2223,7 +2206,7 @@ static void _usage(void) " [--kill-on-bad-exit] [--propagate[=rlimits] [--comment=name]\n" " [--cpu_bind=...] [--mem_bind=...] [--network=type]\n" " [--ntasks-per-node=n] [--ntasks-per-socket=n]\n" -" [--ntasks-per-core=n]\n" +" [--ntasks-per-core=n] [--mem-per-cpu=MB]\n" #ifdef HAVE_BG /* Blue gene specific options */ " [--geometry=XxYxZ] [--conn-type=type] [--no-rotate] [--reboot]\n" " [--blrts-image=path] [--linux-image=path]\n" @@ -2321,8 +2304,8 @@ static void _help(void) " --exclusive allocate nodes in exclusive mode when\n" " cpu consumable resource is enabled\n" " or don't share CPUs for job steps\n" -" --task-mem=MB maximum amount of real memory per task\n" -" required by the job.\n" +" --mem-per-cpu=MB maximum amount of real memory per allocated\n" +" CPU required by the job.\n" " --mem >= --job-mem if --mem is specified.\n" "\n" "Affinity/Multi-core options: (when the task/affinity plugin is enabled)\n" diff --git a/src/srun/opt.h b/src/srun/opt.h index 54cdac36c154b29e9e091c1074de43355bb319fa..200042374ed8a28f30970ecbb9cf52caad2b90fc 100644 --- a/src/srun/opt.h +++ b/src/srun/opt.h @@ -1,6 +1,6 @@ /*****************************************************************************\ * opt.h - definitions for srun option processing - * $Id: opt.h 13771 2008-04-02 20:03:47Z jette $ + * $Id: opt.h 14469 2008-07-09 18:15:23Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -170,7 +170,7 @@ typedef struct srun_options { int32_t job_min_cores; /* --mincores=n */ int32_t job_min_threads;/* --minthreads=n */ int32_t job_min_memory; /* --mem=n */ - int32_t task_mem; /* --task-mem=n */ + int32_t mem_per_cpu; /* --mem-per-cpu=n */ long job_min_tmp_disk; /* --tmp=n */ char *constraints; /* --constraints=, -C constraint*/ bool contiguous; /* --contiguous */ diff --git a/src/srun/srun.c b/src/srun/srun.c index c994af6b4314f8bb083087f578bccee73feb740a..b02333867c9e3741a9b94e88d2985780e8b20a3f 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -120,6 +120,7 @@ static int _become_user (void); static int _call_spank_local_user (srun_job_t *job); static void _define_symbols(void); static void _handle_intr(); +static void _handle_pipe(int signo); static void _handle_signal(int signo); static void _print_job_information(resource_allocation_response_msg_t *resp); static void _pty_restore(void); @@ -229,6 +230,7 @@ int srun(int ac, char **av) exit(1); } } else if ((resp = existing_allocation())) { + job_id = resp->job_id; if (opt.alloc_nodelist == NULL) opt.alloc_nodelist = xstrdup(resp->node_list); @@ -409,7 +411,8 @@ int srun(int ac, char **av) if (opt.debugger_test) mpir_dump_proctable(); } else { - info("Job step aborted before step completely launched."); + info("Job step %u.%u aborted before step completely launched.", + job->jobid, job->stepid); } slurm_step_launch_wait_finish(job->step_ctx); @@ -1061,7 +1064,8 @@ static void _handle_intr() return; } - info("sending Ctrl-C to job"); + info("sending Ctrl-C to job %u.%u", + job->jobid, job->stepid); last_intr_sent = time(NULL); slurm_step_launch_fwd_signal(job->step_ctx, SIGINT); @@ -1072,6 +1076,16 @@ static void _handle_intr() } } +static void _handle_pipe(int signo) +{ + static int ending = 0; + + if(ending) + return; + ending = 1; + slurm_step_launch_abort(job->step_ctx); +} + static void _handle_signal(int signo) { debug2("got signal %d", signo); @@ -1085,7 +1099,11 @@ static void _handle_signal(int signo) /* continue with slurm_step_launch_abort */ case SIGTERM: case SIGHUP: - job_force_termination(job); + /* No need to call job_force_termination here since we + * are ending the job now and we don't need to update the + * state. + */ + info ("forcing job termination"); slurm_step_launch_abort(job->step_ctx); break; /* case SIGTSTP: */ @@ -1104,7 +1122,7 @@ static int _setup_signals() { int sigarray[] = { SIGINT, SIGQUIT, /*SIGTSTP,*/ SIGCONT, SIGTERM, - SIGALRM, SIGUSR1, SIGUSR2, SIGPIPE, 0 + SIGALRM, SIGUSR1, SIGUSR2, /*SIGPIPE,*/ 0 }; int rc = SLURM_SUCCESS, i=0, signo; @@ -1113,6 +1131,10 @@ static int _setup_signals() while ((signo = sigarray[i++])) xsignal(signo, _handle_signal); + /* special case for SIGPIPE since we don't want to print stuff + * and get into a locked up state + */ + xsignal(SIGPIPE, _handle_pipe); return rc; } diff --git a/src/sstat/Makefile.am b/src/sstat/Makefile.am index cdb0a0c6ead9946f76e1d907fdf99fdb2ecf9bed..41cf9df325d1bf1802eb808522a64717f95e1fe8 100644 --- a/src/sstat/Makefile.am +++ b/src/sstat/Makefile.am @@ -6,8 +6,8 @@ INCLUDES = -I$(top_srcdir) bin_PROGRAMS = sstat -sstat_LDADD = $(top_builddir)/src/common/libcommon.o -ldl \ - $(top_builddir)/src/api/libslurmhelper.la +sstat_LDADD = $(top_builddir)/src/api/libslurm.o -ldl + noinst_HEADERS = sstat.c sstat_SOURCES = sstat.c process.c print.c options.c diff --git a/src/sstat/Makefile.in b/src/sstat/Makefile.in index d752d24b74dcb0f1d5e09ae11742bea686c7652e..c06d4277dace34bd3aec64756a7619e4981e2df4 100644 --- a/src/sstat/Makefile.in +++ b/src/sstat/Makefile.in @@ -75,8 +75,7 @@ PROGRAMS = $(bin_PROGRAMS) am_sstat_OBJECTS = sstat.$(OBJEXT) process.$(OBJEXT) print.$(OBJEXT) \ options.$(OBJEXT) sstat_OBJECTS = $(am_sstat_OBJECTS) -sstat_DEPENDENCIES = $(top_builddir)/src/common/libcommon.o \ - $(top_builddir)/src/api/libslurmhelper.la +sstat_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o sstat_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(sstat_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -267,9 +266,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) -sstat_LDADD = $(top_builddir)/src/common/libcommon.o -ldl \ - $(top_builddir)/src/api/libslurmhelper.la - +sstat_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = sstat.c sstat_SOURCES = sstat.c process.c print.c options.c sstat_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) diff --git a/src/sstat/options.c b/src/sstat/options.c index a1aafaa40e9144a7dffa12e6288d026783cc6fb9..13760437233a551ff0a536bb839aa69c7695e52b 100644 --- a/src/sstat/options.c +++ b/src/sstat/options.c @@ -119,30 +119,120 @@ void _do_help(void) void _init_params() { - params.opt_cluster = NULL; /* --cluster */ - params.opt_completion = 0; /* --completion */ - params.opt_dump = 0; /* --dump */ - params.opt_dup = -1; /* --duplicates; +1 = explicitly set */ - params.opt_fdump = 0; /* --formattted_dump */ - params.opt_stat = 0; /* --stat */ - params.opt_gid = -1; /* --gid (-1=wildcard, 0=root) */ - params.opt_header = 1; /* can only be cleared */ - params.opt_help = 0; /* --help */ - params.opt_long = 0; /* --long */ - params.opt_lowmem = 0; /* --low_memory */ - params.opt_purge = 0; /* --purge */ - params.opt_total = 0; /* --total */ - params.opt_uid = -1; /* --uid (-1=wildcard, 0=root) */ - params.opt_uid_set = 0; - params.opt_verbose = 0; /* --verbose */ - params.opt_expire_timespec = NULL; /* --expire= */ - params.opt_field_list = NULL; /* --fields= */ - params.opt_filein = NULL; /* --file */ - params.opt_job_list = NULL; /* --jobs */ - params.opt_partition_list = NULL;/* --partitions */ - params.opt_state_list = NULL; /* --states */ + memset(¶ms, 0, sizeof(sacct_parameters_t)); } +/* returns number of objects added to list */ +static int _addto_job_list(List job_list, char *names) +{ + int i=0, start=0; + char *name = NULL, *dot = NULL; + jobacct_selected_step_t *selected_step = NULL; + jobacct_selected_step_t *curr_step = NULL; + + ListIterator itr = NULL; + char quote_c = '\0'; + int quote = 0; + int count = 0; + + if(!job_list) { + error("No list was given to fill in"); + return 0; + } + + itr = list_iterator_create(job_list); + if(names) { + if (names[i] == '\"' || names[i] == '\'') { + quote_c = names[i]; + quote = 1; + i++; + } + start = i; + while(names[i]) { + //info("got %d - %d = %d", i, start, i-start); + if(quote && names[i] == quote_c) + break; + else if (names[i] == '\"' || names[i] == '\'') + names[i] = '`'; + else if(names[i] == ',') { + if((i-start) > 0) { + char *dot = NULL; + name = xmalloc((i-start+1)); + memcpy(name, names+start, (i-start)); + + selected_step = xmalloc( + sizeof(jobacct_selected_step_t)); + dot = strstr(name, "."); + if (dot == NULL) { + debug2("No jobstep requested"); + selected_step->stepid = NO_VAL; + } else { + *dot++ = 0; + selected_step->stepid = + atoi(dot); + } + selected_step->jobid = atoi(name); + xfree(name); + + while((curr_step = list_next(itr))) { + if((curr_step->jobid + == selected_step->jobid) + && (curr_step->stepid + == selected_step-> + stepid)) + break; + } + + if(!curr_step) { + list_append(job_list, + selected_step); + count++; + } else + destroy_jobacct_selected_step( + selected_step); + list_iterator_reset(itr); + } + i++; + start = i; + } + i++; + } + if((i-start) > 0) { + name = xmalloc((i-start)+1); + memcpy(name, names+start, (i-start)); + + selected_step = + xmalloc(sizeof(jobacct_selected_step_t)); + dot = strstr(name, "."); + if (dot == NULL) { + debug2("No jobstep requested"); + selected_step->stepid = NO_VAL; + } else { + *dot++ = 0; + selected_step->stepid = atoi(dot); + } + selected_step->jobid = atoi(name); + xfree(name); + + while((curr_step = list_next(itr))) { + if((curr_step->jobid == selected_step->jobid) + && (curr_step->stepid + == selected_step->stepid)) + break; + } + + if(!curr_step) { + list_append(job_list, selected_step); + count++; + } else + destroy_jobacct_selected_step( + selected_step); + } + } + list_iterator_destroy(itr); + return count; +} + int decode_state_char(char *state) { if (!strcasecmp(state, "p")) @@ -165,14 +255,13 @@ int decode_state_char(char *state) return -1; // unknown } -void parse_command_line(int argc, char **argv, List selected_steps) +void parse_command_line(int argc, char **argv) { extern int optind; int c, i, optionIndex = 0; char *end = NULL, *start = NULL; jobacct_selected_step_t *selected_step = NULL; ListIterator itr = NULL; - char *dot = NULL; log_options_t logopt = LOG_OPTS_STDERR_ONLY; static struct option long_options[] = { @@ -181,7 +270,7 @@ void parse_command_line(int argc, char **argv, List selected_steps) {"help", 0, ¶ms.opt_help, 1}, {"help-fields", 0, ¶ms.opt_help, 2}, {"jobs", 1, 0, 'j'}, - {"noheader", 0, ¶ms.opt_header, 0}, + {"noheader", 0, ¶ms.opt_noheader, 1}, {"usage", 0, ¶ms.opt_help, 3}, {"verbose", 0, 0, 'v'}, {"version", 0, 0, 'V'}, @@ -198,14 +287,11 @@ void parse_command_line(int argc, char **argv, List selected_steps) opterr = 1; /* Let getopt report problems to the user */ while (1) { /* now cycle through the command line */ - c = getopt_long(argc, argv, "C:F:hj:Vv", + c = getopt_long(argc, argv, "F:hj:Vv", long_options, &optionIndex); if (c == -1) break; switch (c) { - case 'C': - params.opt_cluster = xstrdup(optarg); - break; case 'F': if(params.opt_field_list) xfree(params.opt_field_list); @@ -229,13 +315,10 @@ void parse_command_line(int argc, char **argv, List selected_steps) optarg); exit(1); } - params.opt_job_list = - xrealloc(params.opt_job_list, - (params.opt_job_list==NULL? 0 : - strlen(params.opt_job_list)) + - strlen(optarg) + 1); - strcat(params.opt_job_list, optarg); - strcat(params.opt_job_list, ","); + if(!params.opt_job_list) + params.opt_job_list = list_create( + destroy_jobacct_selected_step); + _addto_job_list(params.opt_job_list, optarg); break; case 'v': /* Handle -vvv thusly... @@ -281,13 +364,10 @@ void parse_command_line(int argc, char **argv, List selected_steps) optarg); exit(1); } - params.opt_job_list = - xrealloc(params.opt_job_list, - (params.opt_job_list==NULL? 0 : - strlen(params.opt_job_list)) + - strlen(optarg) + 1); - strcat(params.opt_job_list, optarg); - strcat(params.opt_job_list, ","); + if(!params.opt_job_list) + params.opt_job_list = list_create( + destroy_jobacct_selected_step); + _addto_job_list(params.opt_job_list, optarg); } if(!params.opt_field_list) { @@ -300,17 +380,13 @@ void parse_command_line(int argc, char **argv, List selected_steps) if (params.opt_verbose) { fprintf(stderr, "Options selected:\n" - "\topt_cluster=%s\n" "\topt_field_list=%s\n" - "\topt_header=%d\n" + "\topt_noheader=%d\n" "\topt_help=%d\n" - "\topt_job_list=%s\n" "\topt_verbose=%d\n", - params.opt_cluster, params.opt_field_list, - params.opt_header, + params.opt_noheader, params.opt_help, - params.opt_job_list, params.opt_verbose); logopt.stderr_level += params.opt_verbose; log_alter(logopt, 0, NULL); @@ -318,46 +394,20 @@ void parse_command_line(int argc, char **argv, List selected_steps) } /* specific jobs requested? */ - if (params.opt_job_list) { - start = params.opt_job_list; - while ((end = strstr(start, ",")) && start) { - *end = 0; - while (isspace(*start)) - start++; /* discard whitespace */ - if(!(int)*start) - continue; - selected_step = - xmalloc(sizeof(jobacct_selected_step_t)); - list_append(selected_steps, selected_step); - - dot = strstr(start, "."); - if (dot == NULL) { - debug2("No jobstep requested"); - selected_step->step = NULL; - selected_step->stepid = (uint32_t)NO_VAL; - } else { - *dot++ = 0; - selected_step->step = xstrdup(dot); - selected_step->stepid = atoi(dot); - } - selected_step->job = xstrdup(start); - selected_step->jobid = atoi(start); - start = end + 1; - } - if (params.opt_verbose) { - fprintf(stderr, "Jobs requested:\n"); - itr = list_iterator_create(selected_steps); - while((selected_step = list_next(itr))) { - if(selected_step->step) - fprintf(stderr, "\t: %s.%s\n", - selected_step->job, - selected_step->step); - else - fprintf(stderr, "\t: %s\n", - selected_step->job); - } - list_iterator_destroy(itr); + if (params.opt_verbose && params.opt_job_list + && list_count(params.opt_job_list)) { + fprintf(stderr, "Jobs requested:\n"); + itr = list_iterator_create(params.opt_job_list); + while((selected_step = list_next(itr))) { + if(selected_step->stepid != NO_VAL) + fprintf(stderr, "\t: %d.%d\n", + selected_step->jobid, + selected_step->stepid); + else + fprintf(stderr, "\t: %d\n", + selected_step->jobid); } + list_iterator_destroy(itr); } start = params.opt_field_list; diff --git a/src/sstat/sstat.c b/src/sstat/sstat.c index 62f246aebd05fe318efdb5e0d991b25e4ecf41ae..33c19cc51624d57b5d06dc283d01da11a4cf3982 100644 --- a/src/sstat/sstat.c +++ b/src/sstat/sstat.c @@ -64,16 +64,6 @@ jobacct_step_rec_t step; int printfields[MAX_PRINTFIELDS], /* Indexed into fields[] */ nprintfields = 0; -void _destroy_steps(void *object) -{ - jobacct_selected_step_t *step = (jobacct_selected_step_t *)object; - if(step) { - xfree(step->job); - xfree(step->step); - xfree(step); - } -} - void _print_header(void) { int i,j; @@ -239,28 +229,28 @@ int _do_stat(uint32_t jobid, uint32_t stepid) int main(int argc, char **argv) { ListIterator itr = NULL; - uint32_t jobid = 0; uint32_t stepid = 0; jobacct_selected_step_t *selected_step = NULL; - List selected_steps = list_create(_destroy_steps); - - parse_command_line(argc, argv, selected_steps); + parse_command_line(argc, argv); + if(!params.opt_job_list || !list_count(params.opt_job_list)) { + error("You didn't give me any jobs to stat."); + return 1; + } - if (params.opt_header) /* give them something to look */ + if (!params.opt_noheader) /* give them something to look */ _print_header();/* at while we think... */ - itr = list_iterator_create(selected_steps); + itr = list_iterator_create(params.opt_job_list); while((selected_step = list_next(itr))) { - jobid = atoi(selected_step->job); - if(selected_step->step) - stepid = atoi(selected_step->step); + if(selected_step->stepid != NO_VAL) + stepid = selected_step->stepid; else stepid = 0; - _do_stat(jobid, stepid); + _do_stat(selected_step->jobid, stepid); } list_iterator_destroy(itr); - list_destroy(selected_steps); + list_destroy(params.opt_job_list); return 0; } diff --git a/src/sstat/sstat.h b/src/sstat/sstat.h index 94b971980d51475d55cfd07aa8ad947d2d66c50a..23e15f9fd5bd349d061513b06e9116ce71d969e2 100644 --- a/src/sstat/sstat.h +++ b/src/sstat/sstat.h @@ -114,6 +114,6 @@ void print_vsize(type_t type, void *object); /* options.c */ -void parse_command_line(int argc, char **argv, List selected_steps); +void parse_command_line(int argc, char **argv); #endif /* !_SACCT_H */ diff --git a/src/strigger/Makefile.am b/src/strigger/Makefile.am index ca5cb43b4da6a109ed2fef350f35d9cad9b39cf9..0f8cebf34da917b99614e44aa9565347c39fdb24 100644 --- a/src/strigger/Makefile.am +++ b/src/strigger/Makefile.am @@ -6,7 +6,7 @@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) bin_PROGRAMS = strigger -strigger_LDADD = $(top_builddir)/src/api/libslurmhelper.la +strigger_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = strigger.h strigger_SOURCES = strigger.c opts.c diff --git a/src/strigger/Makefile.in b/src/strigger/Makefile.in index 1755ca5fadc86a3c545e847222037e2e5fc147ab..1e03e8571ad4d6175bbd8d0ef09ab2256c608ac9 100644 --- a/src/strigger/Makefile.in +++ b/src/strigger/Makefile.in @@ -75,7 +75,7 @@ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) PROGRAMS = $(bin_PROGRAMS) am_strigger_OBJECTS = strigger.$(OBJEXT) opts.$(OBJEXT) strigger_OBJECTS = $(am_strigger_OBJECTS) -strigger_DEPENDENCIES = $(top_builddir)/src/api/libslurmhelper.la +strigger_DEPENDENCIES = $(top_builddir)/src/api/libslurm.o strigger_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(strigger_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -266,7 +266,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) -strigger_LDADD = $(top_builddir)/src/api/libslurmhelper.la +strigger_LDADD = $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = strigger.h strigger_SOURCES = strigger.c opts.c strigger_LDFLAGS = -export-dynamic $(CMD_LDFLAGS) diff --git a/src/sview/Makefile.am b/src/sview/Makefile.am index bf2e360c942659dbcce70afc010bd8bb736fa318..ed216cbb742b5eb0511006e426bbfddced75db29 100644 --- a/src/sview/Makefile.am +++ b/src/sview/Makefile.am @@ -11,8 +11,7 @@ bin_PROGRAMS = sview sview_LDADD = \ $(top_builddir)/src/plugins/select/bluegene/block_allocator/libbluegene_block_allocator.la \ - $(top_builddir)/src/api/libslurmhelper.la - + $(top_builddir)/src/api/libslurm.o -ldl noinst_HEADERS = sview.h sview_SOURCES = sview.c popups.c grid.c part_info.c job_info.c \ diff --git a/src/sview/Makefile.in b/src/sview/Makefile.in index c46c9bfb4a56128155e0127afc29fd5431979d51..5ddd6b9abc4a4d39c23ac38fa86830e6263a0941 100644 --- a/src/sview/Makefile.in +++ b/src/sview/Makefile.in @@ -91,7 +91,7 @@ am__EXTRA_sview_SOURCES_DIST = sview.h sview.c popups.c grid.c \ admin_info.c common.c sview_OBJECTS = $(am_sview_OBJECTS) @HAVE_GTK_TRUE@sview_DEPENDENCIES = $(top_builddir)/src/plugins/select/bluegene/block_allocator/libbluegene_block_allocator.la \ -@HAVE_GTK_TRUE@ $(top_builddir)/src/api/libslurmhelper.la +@HAVE_GTK_TRUE@ $(top_builddir)/src/api/libslurm.o sview_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(CCLD) $(sview_CFLAGS) $(CFLAGS) $(sview_LDFLAGS) \ $(LDFLAGS) -o $@ @@ -286,7 +286,7 @@ AUTOMAKE_OPTIONS = foreign INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) @HAVE_GTK_TRUE@sview_LDADD = \ @HAVE_GTK_TRUE@ $(top_builddir)/src/plugins/select/bluegene/block_allocator/libbluegene_block_allocator.la \ -@HAVE_GTK_TRUE@ $(top_builddir)/src/api/libslurmhelper.la +@HAVE_GTK_TRUE@ $(top_builddir)/src/api/libslurm.o -ldl @HAVE_GTK_TRUE@noinst_HEADERS = sview.h @HAVE_GTK_TRUE@sview_SOURCES = sview.c popups.c grid.c part_info.c job_info.c \ diff --git a/src/sview/popups.c b/src/sview/popups.c index b23f5319264a416bb517b8a3c2f1aefe402ed99b..6415cc5c0b40beb143af713d34c18e803ef8f6bb 100644 --- a/src/sview/popups.c +++ b/src/sview/popups.c @@ -214,7 +214,7 @@ static GtkTreeStore *_local_create_treestore_2cols(GtkWidget *popup, static void _layout_ctl_conf(GtkTreeStore *treestore, slurm_ctl_conf_info_msg_t *slurm_ctl_conf_ptr) { - char temp_str[32]; + char temp_str[32], temp_str2[128]; int update = 0; GtkTreeIter iter; @@ -379,8 +379,9 @@ static void _layout_ctl_conf(GtkTreeStore *treestore, add_display_treestore_line(update, treestore, &iter, "PlugStackConfig", slurm_ctl_conf_ptr->plugstack); - snprintf(temp_str, sizeof(temp_str), "%u", - slurm_ctl_conf_ptr->private_data); + private_data_string(slurm_ctl_conf_ptr->private_data, + temp_str2, sizeof(temp_str2)); + snprintf(temp_str, sizeof(temp_str), "%s", temp_str2); add_display_treestore_line(update, treestore, &iter, "PrivateData", temp_str); diff --git a/src/sview/sview.h b/src/sview/sview.h index 9d70257edcf1e695929dd8b162a3606e845b1ed7..d55d5f26fe11c82a42641403f404eda3a7d57e80 100644 --- a/src/sview/sview.h +++ b/src/sview/sview.h @@ -70,6 +70,7 @@ #include "src/common/macros.h" #include "src/plugins/select/bluegene/block_allocator/block_allocator.h" #include "src/common/slurm_protocol_api.h" +#include "src/common/slurm_protocol_defs.h" #include "src/plugins/select/bluegene/wrap_rm_api.h" diff --git a/testsuite/expect/README b/testsuite/expect/README index 102284a02063900392035e84a018a3c71588e04b..cfb33bea5b7dc1887de3c10c3337f967d2cc5d4c 100644 --- a/testsuite/expect/README +++ b/testsuite/expect/README @@ -499,5 +499,8 @@ test21.1 sacctmgr --usage test21.2 sacctmgr --help test21.3 sacctmgr -V test21.4 sacctmgr version -test21.5 sacctmgr add, list, and delete a cluster -test21.6 sacctmgr add, list, and delete multiple cluster +test21.5 sacctmgr add a cluster +test21.6 sacctmgr add multiple clusters +test21.7 sacctmgr list clusters +test21.8 sacctmgr modify a cluster +test21.9 sacctmgr modify multiple clusters diff --git a/testsuite/expect/globals b/testsuite/expect/globals index cdb60c4ae9a40858d60a09eef5e07c670729f813..05204c455561804780b159718c98c592b6d9ea31 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -978,3 +978,69 @@ proc is_super_user { } { log_user 1 return $found_user } + +################################################################ +# +# +# +################################################################ +proc check_acct_associations { } { + global sacctmgr number + + set rc 1 + set min -1 + set max -1 + log_user 0 + send_user "Testing Associations\n" + # + # Use sacctmgr to check associations + # + set s_pid [spawn $sacctmgr -n -p list assoc wopi wopl withd format=lft,rgt] + expect { + -re "($number)\\|($number)\\|" { + # Here we are checking if we have duplicates and + # setting up an array to check for holes later + + set num1 $expect_out(1,string) + set num2 $expect_out(2,string) + set first [info exists found($num1)] + set sec [info exists found($num2)] + #send_user "$first=$num1 $sec=$num2\n"; + if { $first } { + send_user "FAILURE: found lft $num1 again\n" + set rc 0 + } elseif { $sec } { + send_user "FAILURE: found rgt $num2 again\n" + set rc 0 + } else { + set found($num1) 1 + set found($num2) 1 + if { $min == -1 || $min > $num1 } { + set min $num1 + } + if { $max == -1 || $max < $num2 } { + set max $num2 + } + } + exp_continue + } + timeout { + send_user "FAILURE: sacctmgr add not responding\n" + slow_kill $s_pid + set exit_code 1 + } + eof { + wait + } + } + + # Here we are checking for holes in the list from above + for {set inx $min} {$inx < $max} {incr inx} { + if { ![info exists found($inx)] } { + send_user "FAILURE: No index at $inx\n" + set rc 0 + } + } + log_user 1 + return $rc +} diff --git a/testsuite/expect/test1.23 b/testsuite/expect/test1.23 index ab47df6699f5e4a9ebed3bf66321758dc9863a95..2eb71e58cdb768065c5a98b364aad4b203e23991 100755 --- a/testsuite/expect/test1.23 +++ b/testsuite/expect/test1.23 @@ -108,7 +108,7 @@ set host_0 "" set timeout $max_job_delay set srun_pid [spawn $srun -N1 -l --mem=999999 -t1 $bin_hostname] expect { - -re "configuration is not available" { + -re "not available" { send_user "This error is expected, no worries\n" set err_msg 1 exp_continue diff --git a/testsuite/expect/test1.35 b/testsuite/expect/test1.35 index 298c223fa3bca723d69ab33718f5dbba698822e0..d20e2c83602361b3fc44b46052a58922b4af02cb 100755 --- a/testsuite/expect/test1.35 +++ b/testsuite/expect/test1.35 @@ -121,9 +121,25 @@ if {[wait_for_file $file_out] == 0} { } } if {$step_cnt != $steps_started} { - send_user "\nFAILURE: not all steps reported by squeue\n" - send_user " Check $file_err for errors\n" - set exit_code 1 + set mem_err 0 + if {[wait_for_file $file_err] == 0} { + spawn $bin_cat $file_err + expect { + -re "memory limit" { + set mem_err 1 + } + eof { + wait + } + } + } + if {$mem_err == 1} { + send_user "\nWARNING: Test could not complete due to memory limit\n" + set step_cnt $steps_started + } else { + send_user "\nFAILURE: not all steps reported by squeue\n" + set exit_code 1 + } } if {$exit_code == 0} { diff --git a/testsuite/expect/test1.38 b/testsuite/expect/test1.38 index 074f9a67a25b235a0ceba8532c1a926500308eb8..fd14cf077a405041fafc6fa4c8c651ab057214c8 100755 --- a/testsuite/expect/test1.38 +++ b/testsuite/expect/test1.38 @@ -8,7 +8,8 @@ # "FAILURE: ..." otherwise with an explanation of the failure, OR # anything else indicates a failure mode that must be investigated. ############################################################################ -# Copyright (C) 2002-2006 The Regents of the University of California. +# Copyright (C) 2002-2007 The Regents of the University of California. +# Copyright (C) 2008 Lawrence Livermore National Security. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette <jette1@llnl.gov> # LLNL-CODE-402394. @@ -76,6 +77,11 @@ set matches 0 set job_id 0 set srun_pid [spawn $srun -v -N1 -t1 --unbuffered $file_in] expect { + -re "srun: task0: running" { + incr matches + cancel_job $job_id + exp_continue + } -re "launching ($number).0" { set job_id $expect_out(1,string) exp_continue @@ -85,15 +91,10 @@ expect { exec $bin_kill -INT $srun_pid exp_continue } - -re "srun: interrupt" { + -re "Force Terminated" { incr matches exp_continue } - -re "srun: task0: running" { - incr matches - cancel_job $job_id - exp_continue - } timeout { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid diff --git a/testsuite/expect/test1.49 b/testsuite/expect/test1.49 index fdbc7cfef486c8914612cf695beab10cbffa9179..5c8f43fef92161464f210fefb331e3690b0696c2 100755 --- a/testsuite/expect/test1.49 +++ b/testsuite/expect/test1.49 @@ -74,7 +74,7 @@ make_bash_script $file_in " # causes the test to fail # set matches 0 -set timeout $max_job_delay +set timeout [expr $max_job_delay + 30] if { [test_bluegene] } { set node_cnt 1-1024 } else { diff --git a/testsuite/expect/test1.7 b/testsuite/expect/test1.7 index 52b37442b87a11b68fcd21ea3dfe91d67464bf81..42efc6c938bea2ac87c66abf633fbdb0c160d902 100755 --- a/testsuite/expect/test1.7 +++ b/testsuite/expect/test1.7 @@ -95,7 +95,11 @@ set timeout [expr $max_job_delay + $sleep_time] set timed_out 0 set srun_pid [spawn $srun -t1 $bin_sleep $sleep_time] expect { - -re "time limit exceeded" { + -re "time limit" { + set timed_out 1 + exp_continue + } + -re "TIME LIMIT" { set timed_out 1 exp_continue } @@ -126,7 +130,11 @@ if {$timed_out == 1} { set completions 0 set srun_pid [spawn $srun -t4 $bin_sleep $sleep_time] expect { - -re "time limit exceeded" { + -re "time limit" { + set completions -1 + exp_continue + } + -re "TIME LIMIT" { set completions -1 exp_continue } diff --git a/testsuite/expect/test1.87 b/testsuite/expect/test1.87 index 18a53d4537cc3f45e745d76bac6cf5913d72bfe4..b2ce6cca52d964b988029c2bb8d4790b95279f19 100755 --- a/testsuite/expect/test1.87 +++ b/testsuite/expect/test1.87 @@ -37,7 +37,7 @@ source ./globals set test_id "1.87" set exit_code 0 set file_in "test$test_id.input" -set prompt "SLURM_QA_PROMPT: " +set prompt "QA_PROMPT: " print_header $test_id diff --git a/testsuite/expect/test1.91 b/testsuite/expect/test1.91 index f0a2669ffebfeea0753576330bdec4f3f542b8bd..256b0d4ba56fa9971631d524553dd44deac8ebdc 100755 --- a/testsuite/expect/test1.91 +++ b/testsuite/expect/test1.91 @@ -9,6 +9,7 @@ # anything else indicates a failure mode that must be investigated. ############################################################################ # Copyright (C) 2005-2007 The Regents of the University of California. +# Copyright (C) 2008 Lawrence Livermore National Security. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Morris Jette <jette1@llnl.gov> # LLNL-CODE-402394. @@ -58,23 +59,39 @@ if {$affinity == 0} { send_user "\nWARNING: task affinity not supported on this system\n" exit 0 } + send_user "\ntask affinity plugin installed\n" -set num_sockets 0 -set num_cores 0 -set num_threads 0 +# Identify a usable node +set timeout $max_job_delay set node_name "" -log_user 0 - -# Here we need to get the last node since if we run this on heterogeneous -# systems the count can change - -spawn $scontrol show node +set srun_pid [spawn $srun -N1 --exclusive --verbose $bin_hostname] expect { - -re "NodeName=($alpha_numeric)" { + -re "on host ($alpha_numeric_under)" { set node_name $expect_out(1,string) exp_continue } + timeout { + send_user "\nFAILURE: srun not responding\n" + slow_kill $srun_pid + set exit_code 1 + } + eof { + wait + } +} +if {[string compare $node_name ""] == 0} { + send_user "\nFAILURE: failed to get a usable node name\n" + exit 1 +} + +# Determine how many sockets, cores, and threads the node has +set num_sockets 0 +set num_cores 0 +set num_threads 0 +log_user 0 +spawn $scontrol show node $node_name +expect { -re "Sockets=($number)" { set num_sockets $expect_out(1,string) exp_continue diff --git a/testsuite/expect/test15.7 b/testsuite/expect/test15.7 index ee9a6e126d01f5f8b8fe40d6b8ed3b32b4e930be..f928c5ba58f829dfe32cdd49cded983d14074f77 100755 --- a/testsuite/expect/test15.7 +++ b/testsuite/expect/test15.7 @@ -101,7 +101,7 @@ expect { } exp_continue } - -re "MinMemory=($number)" { + -re "MinMemoryNode=($number)" { set read_mem $expect_out(1,string) if {$read_mem == $mem_size} { incr matches diff --git a/testsuite/expect/test17.10 b/testsuite/expect/test17.10 index e50a538e22ab4b5b5a9d031ef56d4fc8562a1ccc..d2b11f3a26fff6991a28361beff0d9cdc9f2f539 100755 --- a/testsuite/expect/test17.10 +++ b/testsuite/expect/test17.10 @@ -104,7 +104,7 @@ expect { } exp_continue } - -re "MinMemory=($number)" { + -re "MinMemoryNode=($number)" { set read_mem $expect_out(1,string) if {$read_mem == $mem_size} { incr matches diff --git a/testsuite/expect/test19.3 b/testsuite/expect/test19.3 index 41b7007182f1f18a4477d446e2a48dcd5a95eb24..eb5c7f2a65ffc76113b397cf250e2a14680de1f2 100755 --- a/testsuite/expect/test19.3 +++ b/testsuite/expect/test19.3 @@ -41,7 +41,7 @@ print_header $test_id # # get my uid and clear any vestigial triggers # -set uid 0 +set uid -1 spawn $bin_id -u expect { -re "($number)" { @@ -52,9 +52,12 @@ expect { wait } } -if {$uid == 0} { +if {$uid == -1} { send_user "\nCan't get my uid\n" exit 1 +} elseif {$uid == 0} { + send_user "\nWARNING: Can't run this test as user root\n" + exit 0 } exec $strigger --clear --quiet --user=$uid diff --git a/testsuite/expect/test19.4 b/testsuite/expect/test19.4 index 789e4a9aa67fe1065163da30ff704b9c2c6f55d8..cfe61d7adce5f55e60b8da354556f334b6648718 100755 --- a/testsuite/expect/test19.4 +++ b/testsuite/expect/test19.4 @@ -41,7 +41,7 @@ print_header $test_id # # get my uid and clear any vestigial triggers # -set uid 0 +set uid -1 spawn $bin_id -u expect { -re "($number)" { @@ -52,9 +52,12 @@ expect { wait } } -if {$uid == 0} { +if {$uid == -1} { send_user "\nCan't get my uid\n" exit 1 +} elseif {$uid == 0} { + send_user "\nWARNING: Can't run this test as user root\n" + exit 0 } exec $strigger --clear --quiet --user=$uid diff --git a/testsuite/expect/test19.5 b/testsuite/expect/test19.5 index 609541be0455bde5feec982a927bf8de31a21472..39f40b68f43d8fa644089dba1e63c18b0d66e86e 100755 --- a/testsuite/expect/test19.5 +++ b/testsuite/expect/test19.5 @@ -44,7 +44,7 @@ print_header $test_id # # get my uid and clear any vestigial triggers # -set uid 0 +set uid -1 spawn $bin_id -u expect { -re "($number)" { @@ -55,9 +55,12 @@ expect { wait } } -if {$uid == 0} { +if {$uid == -1} { send_user "\nCan't get my uid\n" exit 1 +} elseif {$uid == 0} { + send_user "\nWARNING: Can't run this test as user root\n" + exit 0 } exec $strigger --clear --quiet --user=$uid diff --git a/testsuite/expect/test19.6 b/testsuite/expect/test19.6 index b2d17013b481c7be08d2be37dfd89f2e1bb1b18a..9e871ca97ddaa1d5fd8bd4dbcd56bccb0bebf089 100755 --- a/testsuite/expect/test19.6 +++ b/testsuite/expect/test19.6 @@ -45,7 +45,7 @@ print_header $test_id # # get my uid and clear any vestigial triggers # -set uid 0 +set uid -1 spawn $bin_id -u expect { -re "($number)" { @@ -56,9 +56,12 @@ expect { wait } } -if {$uid == 0} { +if {$uid == -1} { send_user "\nCan't get my uid\n" exit 1 +} elseif {$uid == 0} { + send_user "\nWARNING: Can't run this test as user root\n" + exit 0 } exec $strigger --clear --quiet --user=$uid diff --git a/testsuite/expect/test19.7 b/testsuite/expect/test19.7 index 8dd3ddee3c626b79e870cd58ecc8a7797eca70a6..d4f809814954bcc84e89702186be52796e940cd3 100755 --- a/testsuite/expect/test19.7 +++ b/testsuite/expect/test19.7 @@ -42,7 +42,7 @@ print_header $test_id # # get my uid and clear any vestigial triggers # -set uid 0 +set uid -1 spawn $bin_id -u expect { -re "($number)" { @@ -53,9 +53,12 @@ expect { wait } } -if {$uid == 0} { +if {$uid == -1} { send_user "\nCan't get my uid\n" exit 1 +} elseif {$uid == 0} { + send_user "\nWARNING: Can't run this test as user root\n" + exit 0 } exec $strigger --clear --quiet --user=$uid diff --git a/testsuite/expect/test21.5 b/testsuite/expect/test21.5 index cee55d8f806a5b66def55d021131cde74a396685..60f8c6d93abc1fa7ecbb350883a9797361cb93dc 100755 --- a/testsuite/expect/test21.5 +++ b/testsuite/expect/test21.5 @@ -1,7 +1,7 @@ #!/usr/bin/expect ############################################################################ # Purpose: Test of SLURM functionality -# sacctmgr add, list, and delete a cluster +# sacctmgr add a cluster # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR @@ -33,16 +33,28 @@ source ./globals set test_id "21.5" set exit_code 0 -set amatches 0 -set lmatches 0 -set dmatches 0 +set amatches 0 +set lmatches 0 +set dmatches 0 set not_support 0 set add add set lis list set del delete set mod modify +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall set clu cluster -set tc1 tCluster1 +set tc1 tcluster1 +set fs1 2500 +set mc1 1000000 +set mj1 50 +set mn1 300 +set mw1 01:00:00 print_header $test_id @@ -50,57 +62,43 @@ print_header $test_id # # Use sacctmgr to create a cluster # -set sadd_pid [spawn $sacctmgr $add $clu $tc1] +set sadd_pid [spawn $sacctmgr -i add $clu $nams=$tc1 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] expect { -re "Adding Cluster" { incr amatches exp_continue } - -re "Name *= $tc1" { + -re "$nam *= $tc1" { incr amatches exp_continue } - -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + -re "User Defaults" { incr amatches exp_continue } - -re "\\\(N\\\/y\\\):" { + -re "$fs *= $fs1" { incr amatches - send "Y\r" exp_continue } - timeout { - send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid - set exit_code 1 + -re "$mc *= $mc1" { + incr amatches + exp_continue } - eof { - wait + -re "$mj *= $mj1" { + incr amatches + exp_continue } -} - -if {$amatches != 4} { - send_user "\nFAILURE: sacctmgr had a problem adding clusters\n" - set exit_code 1 -} - -# -# Use sacctmgr to list the addition of cluster -# -set slist_pid [spawn $sacctmgr $lis $clu $tc1] -expect { - -re "Name" { - incr lmatches + -re "$mn *= $mn1" { + incr amatches exp_continue } - -re "$tc1" { - incr lmatches + -re "$mw *= $mw1" { + incr amatches exp_continue - send_user "\nFound $tc1 in database\n" } timeout { - send_user "\nFAILURE: sacctmgr list not responding\n" - slow_kill $slist_pid + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid set exit_code 1 } eof { @@ -108,15 +106,19 @@ expect { } } -if {$lmatches != 2} { - send_user "\nFAILURE: sacctmgr had a problem listing clusters\n" +if {$amatches != 8} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters + got $amatches\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" set exit_code 1 } - # # Use sacctmgr to delete the test cluster # -set sadel_pid [spawn $sacctmgr $del $clu $tc1] +set sadel_pid [spawn $sacctmgr -i $del $clu $tc1] expect { -re "Deleting clusters" { incr dmatches @@ -126,15 +128,6 @@ expect { incr dmatches exp_continue } - -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { - incr dmatches - exp_continue - } - -re "\\\(N\\\/y\\\):" { - incr dmatches - send "Y\r" - exp_continue - } timeout { send_user "\nFAILURE: sacctmgr delete not responding\n" slow_kill $sadel_pid @@ -145,8 +138,13 @@ expect { } } -if {$dmatches != 4} { - send_user "\nFAILURE: sacctmgr had a problem deleting cluster\n" +if {$dmatches != 2} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $dmatches\n" + set exit_code 1 +} + +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" set exit_code 1 } diff --git a/testsuite/expect/test21.6 b/testsuite/expect/test21.6 index 8cf05b9e7ae03bdc1ebc6bfca9b1923e074507fb..7ebf96df6e5f20a953ddc1b9233ba0aee1886158 100755 --- a/testsuite/expect/test21.6 +++ b/testsuite/expect/test21.6 @@ -1,7 +1,7 @@ #!/usr/bin/expect ############################################################################ # Purpose: Test of SLURM functionality -# sacctmgr add, list, and delete multiple clusters +# sacctmgr add multiple clusters # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR @@ -33,18 +33,30 @@ source ./globals set test_id "21.6" set exit_code 0 -set amatches 0 -set lmatches 0 -set dmatches 0 +set amatches 0 +set lmatches 0 +set dmatches 0 set not_support 0 set add add set lis list set del delete set mod modify +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall set clu cluster -set tc1 tCluster1 -set tc2 tCluster2 -set tc3 tCluster3 +set tc1 tcluster1 +set tc2 tcluster2 +set tc3 tcluster3 +set fs1 2500 +set mc1 1000000 +set mj1 50 +set mn1 300 +set mw1 01:00:00 print_header $test_id @@ -52,67 +64,60 @@ print_header $test_id # # Use sacctmgr to create a cluster # -set sadd_pid [spawn $sacctmgr $add $clu $tc1,$tc2,$tc3] +set sadd_pid [spawn $sacctmgr $add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] expect { -re "Adding Cluster" { incr amatches exp_continue } - -re "Name *= $tc1" { + -re "$nam *= $tc1" { incr amatches exp_continue } - -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + -re "$nam *= $tc2" { incr amatches exp_continue } - -re "\\\(N\\\/y\\\):" { + -re "$nam *= $tc3" { incr amatches - send "Y\r" exp_continue } - timeout { - send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid - set exit_code 1 + -re "User Defaults" { + incr amatches + exp_continue } - eof { - wait + -re "$fs *= $fs1" { + incr amatches + exp_continue } -} - -if {$amatches != 4} { - send_user "\nFAILURE: sacctmgr had a problem adding clusters\n" - set exit_code 1 -} - -# -# Use sacctmgr to list the addition of cluster -# -set slist_pid [spawn $sacctmgr $lis $clu $tc1,$tc2,$tc3] -expect { - -re "Name" { - incr lmatches + -re "$mc *= $mc1" { + incr amatches exp_continue } - -re "$tc1" { - incr lmatches + -re "$mj *= $mj1" { + incr amatches exp_continue - send_user "\nFound $tc1 in database\n" } - -re "$tc2" { - incr lmatches + -re "$mn *= $mn1" { + incr amatches exp_continue - send_user "\nFound $tc1 in database\n" } - -re "$tc3" { - incr lmatches + -re "$mw *= $mw1" { + incr amatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr amatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr amatches + send "Y\r" exp_continue - send_user "\nFound $tc1 in database\n" } timeout { - send_user "\nFAILURE: sacctmgr list not responding\n" - slow_kill $slist_pid + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid set exit_code 1 } eof { @@ -120,8 +125,12 @@ expect { } } -if {$lmatches != 4} { - send_user "\nFAILURE: sacctmgr had a problem listing clusters\n" +if {$amatches != 12} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" set exit_code 1 } @@ -161,6 +170,10 @@ if {$dmatches != 4} { send_user "\nFAILURE: sacctmgr had a problem deleting cluster\n" set exit_code 1 } +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} if {$exit_code == 0} { send_user "\nSUCCESS\n" diff --git a/testsuite/expect/test21.7 b/testsuite/expect/test21.7 new file mode 100755 index 0000000000000000000000000000000000000000..622967c5127f9e9d0807117ab542dd398eebd8ec --- /dev/null +++ b/testsuite/expect/test21.7 @@ -0,0 +1,225 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr list clusters +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.7" +set exit_code 0 +set amatches 0 +set lmatches 0 +set dmatches 0 +set not_support 0 +set add add +set lis list +set del delete +set mod modify +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall +set clu cluster +set tc1 tcluster1 +set tc2 tcluster2 +set tc3 tcluster3 +set fs1 2500 +set mc1 1000000 +set mj1 50 +set mn1 300 +set mw1 01:00:00 + + +print_header $test_id + +# +# Use sacctmgr to create a cluster +# +set sadd_pid [spawn $sacctmgr $add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] +expect { + -re "Adding Cluster" { + incr amatches + exp_continue + } + -re "$nam *= $tc1" { + incr amatches + exp_continue + } + -re "$nam *= $tc2" { + incr amatches + exp_continue + } + -re "$nam *= $tc3" { + incr amatches + exp_continue + } + -re "User Defaults" { + incr amatches + exp_continue + } + -re "$fs *= $fs1" { + incr amatches + exp_continue + } + -re "$mc *= $mc1" { + incr amatches + exp_continue + } + -re "$mj *= $mj1" { + incr amatches + exp_continue + } + -re "$mn *= $mn1" { + incr amatches + exp_continue + } + -re "$mw *= $mw1" { + incr amatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr amatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr amatches + send "Y\r" + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$amatches != 12} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the addition of cluster +# +set slist_pid [spawn $sacctmgr $lis $clu] +expect { + -re "Cluster" { + incr lmatches + exp_continue + } + -re "$tc1" { + incr lmatches + exp_continue + } + -re "$fs1 *$mc1 *$mj1 *$mn1 *$mw1" { + incr lmatches + exp_continue + } + -re "$tc2" { + incr lmatches + exp_continue + } + -re "$fs1 *$mc1 *$mj1 *$mn1 *$mw1" { + incr lmatches + exp_continue + } + -re "$tc3" { + incr lmatches + exp_continue + } + -re "$fs1 *$mc1 *$mj1 *$mn1 *$mw1" { + incr lmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list not responding\n" + slow_kill $slist_pid + set exit_code 1 + } + eof { + wait + } +} + +# +# Use sacctmgr to delete the test cluster +# +set sadel_pid [spawn $sacctmgr $del $clu $tc1,$tc2,$tc3] +expect { + -re "Deleting clusters" { + incr dmatches + exp_continue + } + -re "$tc1" { + incr dmatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr dmatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr dmatches + send "Y\r" + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dmatches != 4} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} + +exit $exit_code diff --git a/testsuite/expect/test21.8 b/testsuite/expect/test21.8 new file mode 100755 index 0000000000000000000000000000000000000000..170044417825298a9308004ae202898c5518a886 --- /dev/null +++ b/testsuite/expect/test21.8 @@ -0,0 +1,292 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr modify cluster +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.8" +set exit_code 0 +set amatches 0 +set mmatches 0 +set lmatches 0 +set dmatches 0 +set not_support 0 +set add add +set lis list +set del delete +set mod modify +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall +set clu cluster +set tc1 tcluster1 +set tc2 tcluster2 +set tc3 tcluster3 +set fs1 2500 +set fs2 1375 +set mc1 1000000 +set mc2 200000 +set mj1 50 +set mj2 9 +set mn1 300 +set mn2 125 +set mw1 01:00:00 +set mw2 12:00:00 + + +print_header $test_id + +# +# Use sacctmgr to create a cluster +# +set sadd_pid [spawn $sacctmgr $add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] +expect { + -re "Adding Cluster" { + incr amatches + exp_continue + } + -re "$nam *= $tc1" { + incr amatches + exp_continue + } + -re "$nam *= $tc2" { + incr amatches + exp_continue + } + -re "$nam *= $tc3" { + incr amatches + exp_continue + } + -re "User Defaults" { + incr amatches + exp_continue + } + -re "$fs *= $fs1" { + incr amatches + exp_continue + } + -re "$mc *= $mc1" { + incr amatches + exp_continue + } + -re "$mj *= $mj1" { + incr amatches + exp_continue + } + -re "$mn *= $mn1" { + incr amatches + exp_continue + } + -re "$mw *= $mw1" { + incr amatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr amatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr amatches + send "Y\r" + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$amatches != 12} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to modify one cluster +# +set smod_pid [spawn $sacctmgr $mod $clu set $fs=$fs2 $mc=$mc2 $mj=$mj2 $mn=$mn2 $mw=$mw2 where $nams=$tc2] +expect { + -re "Setting" { + incr mmatches + exp_continue + } + -re "User Defaults" { + incr mmatches + exp_continue + } + -re "$fs *= $fs2" { + incr mmatches + exp_continue + } + -re "$mc *= $mc2" { + incr mmatches + exp_continue + } + -re "$mj *= $mj2" { + incr mmatches + exp_continue + } + -re "$mn *= $mn2" { + incr mmatches + exp_continue + } + -re "$mw *= $mw2" { + incr mmatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr mmatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr mmatches + send "Y\r" + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr modify not responding\n" + slow_kill $smod_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$mmatches != 9} { + send_user "\nFAILURE: sacctmgr had a problem modifying clusters\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the addition of cluster +# +set slist_pid [spawn $sacctmgr $lis $clu] +expect { + -re "Cluster" { + incr lmatches + exp_continue + } + -re "$tc1" { + incr lmatches + exp_continue + } + -re "$fs1 *$mc1 *$mj1 *$mn1 *$mw1" { + incr lmatches + exp_continue + } + -re "$tc2" { + incr lmatches + exp_continue + } + -re "$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + incr lmatches + exp_continue + } + -re "$tc3" { + incr lmatches + exp_continue + } + -re "$fs1 *$mc1 *$mj1 *$mn1 *$mw1" { + incr lmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list not responding\n" + slow_kill $slist_pid + set exit_code 1 + } + eof { + wait + } +} + +# +# Use sacctmgr to delete the test cluster +# +set sadel_pid [spawn $sacctmgr $del $clu $tc1,$tc2,$tc3] +expect { + -re "Deleting clusters" { + incr dmatches + exp_continue + } + -re "$tc1" { + incr dmatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr dmatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr dmatches + send "Y\r" + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dmatches != 4} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} + +exit $exit_code diff --git a/testsuite/expect/test21.9 b/testsuite/expect/test21.9 new file mode 100755 index 0000000000000000000000000000000000000000..12c0dd9113f4b6ea3afeb342acf7a763abc48d4f --- /dev/null +++ b/testsuite/expect/test21.9 @@ -0,0 +1,292 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr modify multiple clusters +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# LLNL-CODE-402394. +# +# This file is part of SLURM, a resource management program. +# For details, see <http://www.llnl.gov/linux/slurm/>. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals + +set test_id "21.9" +set exit_code 0 +set amatches 0 +set mmatches 0 +set lmatches 0 +set dmatches 0 +set not_support 0 +set add add +set lis list +set del delete +set mod modify +set nams Names +set nam Name +set fs Fairshare +set mc MaxCPUSecs +set mj MaxJobs +set mn MaxNodes +set mw MaxWall +set clu cluster +set tc1 tcluster1 +set tc2 tcluster2 +set tc3 tcluster3 +set fs1 2500 +set fs2 1375 +set mc1 1000000 +set mc2 200000 +set mj1 50 +set mj2 9 +set mn1 300 +set mn2 125 +set mw1 01:00:00 +set mw2 12:00:00 + + +print_header $test_id + +# +# Use sacctmgr to create a cluster +# +set sadd_pid [spawn $sacctmgr $add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] +expect { + -re "Adding Cluster" { + incr amatches + exp_continue + } + -re "$nam *= $tc1" { + incr amatches + exp_continue + } + -re "$nam *= $tc2" { + incr amatches + exp_continue + } + -re "$nam *= $tc3" { + incr amatches + exp_continue + } + -re "User Defaults" { + incr amatches + exp_continue + } + -re "$fs *= $fs1" { + incr amatches + exp_continue + } + -re "$mc *= $mc1" { + incr amatches + exp_continue + } + -re "$mj *= $mj1" { + incr amatches + exp_continue + } + -re "$mn *= $mn1" { + incr amatches + exp_continue + } + -re "$mw *= $mw1" { + incr amatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr amatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr amatches + send "Y\r" + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $sadd_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$amatches != 12} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to modify one cluster +# +set smod_pid [spawn $sacctmgr $mod $clu set $fs=$fs2 $mc=$mc2 $mj=$mj2 $mn=$mn2 $mw=$mw2 where $nams=$tc1,$tc2,$tc3] +expect { + -re "Setting" { + incr mmatches + exp_continue + } + -re "User Defaults" { + incr mmatches + exp_continue + } + -re "$fs *= $fs2" { + incr mmatches + exp_continue + } + -re "$mc *= $mc2" { + incr mmatches + exp_continue + } + -re "$mj *= $mj2" { + incr mmatches + exp_continue + } + -re "$mn *= $mn2" { + incr mmatches + exp_continue + } + -re "$mw *= $mw2" { + incr mmatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr mmatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr mmatches + send "Y\r" + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr modify not responding\n" + slow_kill $smod_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$mmatches != 9} { + send_user "\nFAILURE: sacctmgr had a problem modifying clusters\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +# +# Use sacctmgr to list the addition of cluster +# +set slist_pid [spawn $sacctmgr $lis $clu] +expect { + -re "Cluster" { + incr lmatches + exp_continue + } + -re "$tc1" { + incr lmatches + exp_continue + } + -re "$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + incr lmatches + exp_continue + } + -re "$tc2" { + incr lmatches + exp_continue + } + -re "$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + incr lmatches + exp_continue + } + -re "$tc3" { + incr lmatches + exp_continue + } + -re "$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { + incr lmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list not responding\n" + slow_kill $slist_pid + set exit_code 1 + } + eof { + wait + } +} + +# +# Use sacctmgr to delete the test cluster +# +set sadel_pid [spawn $sacctmgr $del $clu $tc1,$tc2,$tc3] +expect { + -re "Deleting clusters" { + incr dmatches + exp_continue + } + -re "$tc1" { + incr dmatches + exp_continue + } + -re "Would you like to commit changes\\\? \\\(You have 30 seconds to decide\\\)" { + incr dmatches + exp_continue + } + -re "\\\(N\\\/y\\\):" { + incr dmatches + send "Y\r" + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $sadel_pid + set exit_code 1 + } + eof { + wait + } +} + +if {$dmatches != 4} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster\n" + set exit_code 1 +} +if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + set exit_code 1 +} + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} + +exit $exit_code diff --git a/testsuite/expect/test5.6 b/testsuite/expect/test5.6 index b6b74ac737fe0183813cfe2c5247f913d61aa553..7d2879d346f25a1880ae895e8be97410b17acc2b 100755 --- a/testsuite/expect/test5.6 +++ b/testsuite/expect/test5.6 @@ -287,7 +287,7 @@ expect { wait } } -if {[string compare partition2 ""]} { +if {[string compare partition2 ""] == 0} { set partition2 $partition1 } spawn $squeue --format=%P --noheader --partitions=$partition1