From 39f6f501e802b73fe338966da51cdc0a7e94045d Mon Sep 17 00:00:00 2001 From: Mehdi Dogguy <mehdi@debian.org> Date: Mon, 8 Sep 2014 21:31:46 +0200 Subject: [PATCH] Imported Upstream version 1.3.8 --- META | 4 +- NEWS | 28 +- configure | 2 +- doc/html/faq.shtml | 17 +- doc/html/testimonials.shtml | 2 +- doc/man/man1/salloc.1 | 18 +- doc/man/man1/scontrol.1 | 7 +- doc/man/man1/srun.1 | 6 +- doc/man/man8/spank.8 | 42 +- slurm.spec | 6 +- slurm/slurm.h.in | 3 + slurm/spank.h | 13 + src/api/partition_info.c | 5 +- src/common/assoc_mgr.c | 17 +- src/common/env.c | 13 +- src/common/plugstack.c | 268 ++-- src/common/read_config.c | 6 + src/common/slurm_accounting_storage.c | 63 +- src/common/slurm_accounting_storage.h | 22 +- src/common/slurm_protocol_api.c | 18 + src/common/slurm_protocol_api.h | 6 + src/common/slurm_protocol_defs.c | 24 +- .../slurm_protocol_socket_implementation.c | 15 +- .../filetxt/accounting_storage_filetxt.c | 20 +- .../gold/accounting_storage_gold.c | 96 +- .../mysql/accounting_storage_mysql.c | 523 +++++--- .../mysql/mysql_jobacct_process.c | 84 +- .../mysql/mysql_jobacct_process.h | 2 +- .../none/accounting_storage_none.c | 21 +- .../pgsql/accounting_storage_pgsql.c | 20 +- .../slurmdbd/accounting_storage_slurmdbd.c | 20 +- src/plugins/sched/backfill/backfill.c | 12 +- src/plugins/sched/backfill/backfill.h | 3 + src/plugins/sched/backfill/backfill_wrapper.c | 13 - src/plugins/sched/wiki/job_modify.c | 9 +- src/plugins/sched/wiki2/job_modify.c | 8 +- .../block_allocator/block_allocator.c | 587 +++++---- .../block_allocator/block_allocator.h | 308 +++-- .../select/bluegene/plugin/bg_block_info.c | 19 +- .../select/bluegene/plugin/bg_job_place.c | 10 +- .../select/bluegene/plugin/bg_job_run.c | 166 ++- .../bluegene/plugin/bg_record_functions.c | 2 +- src/plugins/select/bluegene/plugin/bluegene.c | 16 +- .../select/bluegene/plugin/defined_block.c | 4 +- .../select/bluegene/plugin/select_bluegene.c | 8 +- .../select/bluegene/plugin/slurm_prolog.c | 5 +- src/plugins/select/cons_res/select_cons_res.c | 89 +- src/sacct/options.c | 61 +- src/sacctmgr/account_functions.c | 71 +- src/sacctmgr/association_functions.c | 3 +- src/sacctmgr/cluster_functions.c | 48 +- src/sacctmgr/common.c | 19 +- src/sacctmgr/file_functions.c | 149 ++- src/sacctmgr/qos_functions.c | 4 +- src/sacctmgr/sacctmgr.c | 17 +- src/sacctmgr/txn_functions.c | 2 +- src/sacctmgr/user_functions.c | 42 +- src/salloc/Makefile.am | 2 +- src/salloc/Makefile.in | 2 +- src/salloc/opt.c | 25 +- src/salloc/salloc.c | 115 ++ src/sbatch/sbatch.c | 24 +- src/scontrol/scontrol.c | 14 +- src/sinfo/opts.c | 6 +- src/sinfo/sinfo.c | 3 +- src/slurmctld/controller.c | 13 +- src/slurmctld/job_mgr.c | 115 +- src/slurmctld/node_scheduler.c | 9 +- src/slurmctld/slurmctld.h | 12 +- src/slurmd/slurmstepd/task.c | 33 +- src/slurmdbd/proc_req.c | 24 +- src/slurmdbd/read_config.c | 38 +- src/slurmdbd/read_config.h | 1 + src/sreport/cluster_reports.c | 3 +- src/sreport/job_reports.c | 5 +- src/sreport/user_reports.c | 2 +- src/sstat/options.c | 17 +- src/sview/grid.c | 11 +- testsuite/expect/test21.15 | 803 +++++++++--- testsuite/expect/test21.16 | 842 +++++++++---- testsuite/expect/test21.17 | 973 +++++++++++---- testsuite/expect/test21.18 | 1032 ++++++++++++---- testsuite/expect/test21.19 | 1096 ++++++++++++++--- testsuite/expect/test21.5 | 67 +- testsuite/expect/test6.11 | 4 +- testsuite/expect/test7.11 | 6 +- testsuite/expect/test7.11.prog.c | 10 + testsuite/expect/test7.3 | 2 +- 88 files changed, 6251 insertions(+), 2124 deletions(-) diff --git a/META b/META index 6202ef8e5..c8a6dd3fd 100644 --- a/META +++ b/META @@ -3,9 +3,9 @@ Api_revision: 0 Major: 1 Meta: 1 - Micro: 7 + Micro: 8 Minor: 3 Name: slurm Release: 1 Release_tags: dist - Version: 1.3.7 + Version: 1.3.8 diff --git a/NEWS b/NEWS index df2024005..2d5dcedbf 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,25 @@ This file describes changes in recent versions of SLURM. It primarily documents those changes that are of interest to users and admins. +* Changes in SLURM 1.3.8 +======================== + -- Added PrivateData flags for Users, Usage, and Accounts to Accounting. + If using slurmdbd, set in the slurmdbd.conf file. Otherwise set in the + slurm.conf file. See "man slurm.conf" or "man slurmdbd.conf" for details. + -- Reduce frequency of resending job kill RPCs. Helpful in the event of + network problems or down nodes. + -- Fix memory leak caused under heavy load when running with select/cons_res + plus sched/backfill. + -- For salloc, if no local command is specified, execute the user's default + shell. + -- BLUEGENE - patch to make sure when starting a job blocks required to be + freed are checked to make sure no job is running on them. If one is found + we will requeue the new job. No job will be lost. + -- BLUEGENE - Set MPI environment variables from salloc. + -- BLUEGENE - Fix threading issue for overlap mode + -- Reject batch scripts containing DOS linebreaks. + -- BLUEGENE - Added wait for block boot to salloc + * Changes in SLURM 1.3.7 ======================== -- Add jobid/stepid to MESSAGE_TASK_EXIT to address race condition when @@ -450,6 +469,13 @@ documents those changes that are of interest to users and admins. Moved existing digital signature logic into new plugin: crypto/openssl. Added new support for crypto/munge (available with GPL license). +* Changes in SLURM 1.2.35 +========================= + -- Permit SPANK plugins to dynamically register options at runtime base upon + configuration or other runtime checks. + -- Add "include" keywork to SPANK plugstack.conf file to optionally include + other configuration files or directories of configuration files. + * Changes in SLURM 1.2.34 ========================= -- Permit the cancellation of a job that is in the process of being @@ -3436,4 +3462,4 @@ documents those changes that are of interest to users and admins. -- Change directory to /tmp in slurmd if daemonizing. -- Logfiles are reopened on reconfigure. -$Id: NEWS 14883 2008-08-25 21:36:27Z jette $ +$Id: NEWS 14961 2008-09-03 17:38:39Z da $ diff --git a/configure b/configure index 00ef5f35a..c052058b6 100755 --- a/configure +++ b/configure @@ -25271,7 +25271,7 @@ echo "$as_me: WARNING: *** pkg-config not found. Cannot probe for libglade-2.0 o # fi -### Check for gtk2.8 package +### Check for gtk2.7.1 package if test "$ac_have_gtk" == "yes" ; then $HAVEPKGCONFIG --exists gtk+-2.0 if ! test $? -eq 0 ; then diff --git a/doc/html/faq.shtml b/doc/html/faq.shtml index 9061ba998..b9711d0b1 100644 --- a/doc/html/faq.shtml +++ b/doc/html/faq.shtml @@ -85,6 +85,8 @@ SLURM RPM?</li> <li><a href="#slurmdbd">Why should I use the slurmdbd instead of the regular database plugins?</li> <li><a href="#debug">How can I build SLURM with debugging symbols?</li> +<li><a href="#state_preserve">How can I easily preserve drained node information +between major SLURM updates?</li> </ol> <h2>For Users</h2> @@ -984,8 +986,21 @@ You want the "-g" option to produce debugging information and "-O0" to set the optimization level to zero (off). For example:<br> CFLAGS="-g -O0" ./configure ... +<p><a name="state_preserve"><b>30. How can I easily preserve drained node +information between major SLURM updates?</b><br> +Major SLURM updates generally have changes in the state save files and +communication protocols, so a cold-start (without state) is generally +required. If you have nodes in a DRAIN state and want to preserve that +information, you can easily build a script to preserve that information +using the <i>sinfo</i> command. The following command line will report the +<i>Reason</i> field for every node in a DRAIN state and write the output +in a form that can be executed later to restore state. +<pre> +sinfo -t drain -h -o "scontrol update nodename='%N' state=drain reason='%E'" +</pre> + <p class="footer"><a href="#top">top</a></p> -<p style="text-align:center;">Last modified 8 August 2008</p> +<p style="text-align:center;">Last modified 2 September 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/html/testimonials.shtml b/doc/html/testimonials.shtml index 0ee1daef5..ebc3c819f 100644 --- a/doc/html/testimonials.shtml +++ b/doc/html/testimonials.shtml @@ -112,6 +112,6 @@ Bill Celmaster, XC Program Manager, Hewlett-Packard Company </i> <HR SIZE=4> -<p style="text-align:center;">Last modified 289 July 2008</p> +<p style="text-align:center;">Last modified 28 July 2008</p> <!--#include virtual="footer.txt"--> diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index 356f916d0..58d7fc300 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -1,16 +1,24 @@ -.TH "salloc" "1" "SLURM 1.3" "July 2008" "SLURM Commands" +.TH "salloc" "1" "SLURM 1.3" "August 2008" "SLURM Commands" .SH "NAME" .LP -salloc \- Obtain a SLURM job allocation (a set of nodes), execute a command, and then release the allocation when the command is finished. +salloc \- Obtain a SLURM job allocation (a set of nodes), execute a command, +and then release the allocation when the command is finished. + .SH "SYNOPSIS" .LP salloc [\fIoptions\fP] [<\fIcommand\fP> [\fIcommand args\fR]] .SH "DESCRIPTION" .LP -salloc is used to allocate a SLURM job allocation, which is a set of resources (nodes), possibly with some set of constraints (e.g. number of processors per node). When salloc successfully obtains the requested allocation, it then runs the command specified by the user. Finally, when the user specified command is complete, salloc relinquishes the job allocation. - -The command may be any program the user wishes. Some typical commands are xterm, a shell script containing srun commands, and srun (see the EXAMPLES section). +salloc is used to allocate a SLURM job allocation, which is a set of resources +(nodes), possibly with some set of constraints (e.g. number of processors per +node). When salloc successfully obtains the requested allocation, it then runs +the command specified by the user. Finally, when the user specified command is +complete, salloc relinquishes the job allocation. + +The command may be any program the user wishes. Some typical commands are +xterm, a shell script containing srun commands, and srun (see the EXAMPLES +section). If no command is specified, the user's default shell. .SH "OPTIONS" .LP diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index b0d91c25d..34343bd2e 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -1,4 +1,4 @@ -.TH SCONTROL "1" "December 2007" "scontrol 1.3" "Slurm components" +.TH SCONTROL "1" "August 2008" "scontrol 1.3" "Slurm components" .SH "NAME" scontrol \- Used view and modify Slurm configuration and state. @@ -43,8 +43,9 @@ Print information one line per record. Print no warning or informational messages, only fatal error messages. .TP \fB\-v\fR, \fB\-\-verbose\fR -Print detailed event logging. This includes time\-stamps on data structures, -record counts, etc. +Print detailed event logging. Multiple \fB\-v\fR's will further increase +the verbosity of logging. By default only errors will be displayed. + .TP \fB\-V\fR , \fB\-\-version\fR Print version information and exit. diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index 5aca020e0..49fa02da3 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -1,4 +1,4 @@ -.TH SRUN "1" "July 2008" "srun 1.3" "slurm components" +.TH SRUN "1" "August 2008" "srun 1.3" "slurm components" .SH "NAME" srun \- run parallel jobs @@ -901,8 +901,8 @@ Display version information and exit. .TP \fB\-v\fR, \fB\-\-verbose\fR -verbose operation. Multiple \fB\-v\fR's will further increase the verbosity of -\fBsrun\fR. By default only errors will be displayed. +Print detailed event logging. Multiple \fB\-v\fR's will further increase +the verbosity of logging. By default only errors will be displayed. .TP \fB\-W\fR, \fB\-\-wait\fR=\fIseconds\fR diff --git a/doc/man/man8/spank.8 b/doc/man/man8/spank.8 index 2ee7e5886..1599ec8cb 100644 --- a/doc/man/man8/spank.8 +++ b/doc/man/man8/spank.8 @@ -169,10 +169,8 @@ option is specified, its value is forwarded and registered with the plugin on the remote side. In this way, \fBSPANK\fR plugins may dynamically provide new options and functionality to SLURM. .LP -In order to register new options with SLURM, a plugin must -define symbol \fBspank_options\fR which is a pointer to the -first element of an array of \fBstruct spank_option\fR declared -in \fB<slurm/spank.h>\fR as +Each option registered by a plugin to SLURM takes the form of +a \fBstruct spank_option\fR which is declared in \fB<slurm/spank.h>\fR as .nf struct spank_option { @@ -185,7 +183,7 @@ in \fB<slurm/spank.h>\fR as }; .fi -.LP + Where .TP .I name @@ -221,10 +219,28 @@ struct, \fIoptarg\fR is the supplied argument if applicable, and \fIremote\fR is 0 if the function is being called from the "local" host (e.g. \fBsrun\fR or \fBsbatch\fR) or 1 from the "remote" host (\fBslurmd\fR). .LP -The last element of the array must filled with zeros. A +There are two methods by which the plugin can register these options +with SLURM. The simplest method is for the plugin to define an array +of \fBstruct spank_option\fR with the symbol name \fBspank_options\fR. +This final element in the options table must be filled with zeros. A \fBSPANK_OPTIONS_TABLE_END\fR macro is defined in \fB<slurm/spank.h>\fR for this purpose. .LP +Plugin options may also be dynamically registered with SLURM using +the \fBspank_option_register\fR function. This function is only valid +when called from the plugin's \fBslurm_spank_init\fR handler, and +registers one option at a time. The prototype is +.nf + + spank_err_t spank_option_register (spank_t sp, + struct spank_option *opt); + +.fi +This function will return \fBESPANK_SUCCESS\fR on successful registration +of an option, or \fBESPANK_BAD_ARG\fR for errors including invalid spank_t +handle, or when the function is not called from the \fBslurm_spank_init\fR +function. +.LP When an option is provided by the user on the local side, \fBSLURM\fR will immediately invoke the option's callback with \fIremote\fR=0. This is meant for the plugin to do local sanity checking of the option before @@ -276,6 +292,20 @@ be placed into the config file. The plugins will simply be called in order, one after the other, and appropriate action taken on failure given that state of the plugin's \fIoptional\fR flag. .LP +Additional config files or directories of config files may be included +in \fBplugstack.conf\fR with the \fBinclude\fR keyword. The \fBinclude\fR +keyword must appear on its own line, and takes a glob as its parameter, +so multiple files may be included from one \fBinclude\fR line. For +example, the following syntax will load all config files in the +/etc/slurm/plugstack.conf.d directory, in local collation order: +.nf + + include /etc/slurm/plugstack.conf.d/* + +.fi +which might be considered a more flexible method for building up +a spank plugin stack. +.LP The \fBSPANK\fR config file is re\-read on each job launch, so editing the config file will not affect running jobs. However care should be taken so that a partially edited config file is not read by a diff --git a/slurm.spec b/slurm.spec index 79c3e3a08..b452bc5ef 100644 --- a/slurm.spec +++ b/slurm.spec @@ -71,14 +71,14 @@ %endif Name: slurm -Version: 1.3.7 +Version: 1.3.8 Release: 1%{?dist} Summary: Simple Linux Utility for Resource Management License: GPL Group: System Environment/Base -Source: slurm-1.3.7.tar.bz2 +Source: slurm-1.3.8.tar.bz2 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release} URL: https://computing.llnl.gov/linux/slurm/ @@ -256,7 +256,7 @@ SLURM process tracking plugin for SGI job containers. ############################################################################# %prep -%setup -n slurm-1.3.7 +%setup -n slurm-1.3.8 %build %configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \ diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index f9819f4e8..142db9666 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -454,6 +454,9 @@ typedef enum select_type_plugin_info { #define PRIVATE_DATA_JOBS 0x0001 /* job/step data is private */ #define PRIVATE_DATA_NODES 0x0002 /* node data is private */ #define PRIVATE_DATA_PARTITIONS 0x0004 /* partition data is private */ +#define PRIVATE_DATA_USAGE 0x0008 /* accounting usage data is private */ +#define PRIVATE_DATA_USERS 0x0010 /* accounting user data is private */ +#define PRIVATE_DATA_ACCOUNTS 0x0020 /* accounting account data is private */ /*****************************************************************************\ * PROTOCOL DATA STRUCTURE DEFINITIONS diff --git a/slurm/spank.h b/slurm/spank.h index 5ef0a5eb1..3d59ba174 100644 --- a/slurm/spank.h +++ b/slurm/spank.h @@ -183,6 +183,8 @@ struct spank_option { /* * Plugin may declare spank_options option table: + * [Note: options may also be declared with spank_option_register(), + * defined below.] */ extern struct spank_option spank_options []; @@ -222,6 +224,17 @@ int spank_symbol_supported (const char *symbol); */ int spank_remote (spank_t spank); +/* + * Register a plugin-provided option dynamically. This function + * is only valid when called from slurm_spank_init(), and must + * be called in both remote (slurmd) and local (srun) contexts. + * May be called multiple times to register many options. + * + * Returns ESPANK_SUCCESS on successful registration of the option + * or ESPANK_BAD_ARG if not called from slurm_spank_init(). + */ +spank_err_t spank_option_register (spank_t spank, struct spank_option *opt); + /* Get the value for the current job or task item specified, * storing the result in the subsequent pointer argument(s). diff --git a/src/api/partition_info.c b/src/api/partition_info.c index 416d43440..9bc41dd76 100644 --- a/src/api/partition_info.c +++ b/src/api/partition_info.c @@ -114,11 +114,12 @@ char *slurm_sprint_partition_info ( partition_info_t * part_ptr, #ifdef HAVE_BG convert_num_unit((float)part_ptr->total_nodes, tmp1, sizeof(tmp1), UNIT_NONE); + convert_num_unit((float)part_ptr->total_cpus, tmp2, sizeof(tmp2), + UNIT_NONE); #else snprintf(tmp1, sizeof(tmp1), "%u", part_ptr->total_nodes); + snprintf(tmp2, sizeof(tmp2), "%u", part_ptr->total_cpus); #endif - convert_num_unit((float)part_ptr->total_cpus, tmp2, sizeof(tmp2), - UNIT_NONE); snprintf(tmp_line, sizeof(tmp_line), "PartitionName=%s TotalNodes=%s TotalCPUs=%s ", part_ptr->name, tmp1, tmp2); diff --git a/src/common/assoc_mgr.c b/src/common/assoc_mgr.c index 7a7396a27..3a41b90a1 100644 --- a/src/common/assoc_mgr.c +++ b/src/common/assoc_mgr.c @@ -92,6 +92,8 @@ static int _get_local_association_list(void *db_conn, int enforce) { acct_association_cond_t assoc_q; char *cluster_name = NULL; + uid_t uid = getuid(); + // DEF_TIMERS; slurm_mutex_lock(&local_association_lock); if(local_association_list) @@ -113,7 +115,7 @@ static int _get_local_association_list(void *db_conn, int enforce) // START_TIMER; local_association_list = - acct_storage_g_get_associations(db_conn, &assoc_q); + acct_storage_g_get_associations(db_conn, uid, &assoc_q); // END_TIMER2("get_associations"); if(assoc_q.cluster_list) @@ -149,10 +151,12 @@ static int _get_local_association_list(void *db_conn, int enforce) static int _get_local_qos_list(void *db_conn, int enforce) { + uid_t uid = getuid(); + slurm_mutex_lock(&local_qos_lock); if(local_qos_list) list_destroy(local_qos_list); - local_qos_list = acct_storage_g_get_qos(db_conn, NULL); + local_qos_list = acct_storage_g_get_qos(db_conn, uid, NULL); if(!local_qos_list) { slurm_mutex_unlock(&local_qos_lock); @@ -172,14 +176,15 @@ static int _get_local_qos_list(void *db_conn, int enforce) static int _get_local_user_list(void *db_conn, int enforce) { acct_user_cond_t user_q; + uid_t uid = getuid(); memset(&user_q, 0, sizeof(acct_user_cond_t)); user_q.with_coords = 1; - + slurm_mutex_lock(&local_user_lock); if(local_user_list) list_destroy(local_user_list); - local_user_list = acct_storage_g_get_users(db_conn, &user_q); + local_user_list = acct_storage_g_get_users(db_conn, uid, &user_q); if(!local_user_list) { slurm_mutex_unlock(&local_user_lock); @@ -197,7 +202,7 @@ static int _get_local_user_list(void *db_conn, int enforce) while((user = list_next(itr))) { uid_t pw_uid = uid_from_string(user->name); if(pw_uid == (uid_t) -1) { - error("couldn't get a uid for user %s", + debug("couldn't get a uid for user %s", user->name); user->uid = (uint32_t)NO_VAL; } else @@ -704,7 +709,7 @@ extern int assoc_mgr_update_local_users(acct_update_object_t *update) } pw_uid = uid_from_string(object->name); if(pw_uid == (uid_t) -1) { - error("couldn't get a uid for user %s", + debug("couldn't get a uid for user %s", object->name); object->uid = NO_VAL; } else diff --git a/src/common/env.c b/src/common/env.c index 63f2056c0..008531e22 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -771,6 +771,7 @@ extern char *uint32_compressed_to_str(uint32_t array_len, * SLURM_JOB_NODELIST * SLURM_JOB_CPUS_PER_NODE * LOADLBATCH (AIX only) + * MPIRUN_PARTITION, MPIRUN_NOFREE, and MPIRUN_NOALLOCATE (BGL only) * * Sets OBSOLETE variables: * SLURM_JOBID @@ -782,7 +783,7 @@ extern char *uint32_compressed_to_str(uint32_t array_len, void env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc) { - char *tmp; + char *bgl_part_id = NULL, *tmp; env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", alloc->job_id); env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", @@ -799,6 +800,16 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc) env_array_overwrite(dest, "LOADLBATCH", "yes"); #endif + /* BlueGene only */ + select_g_get_jobinfo(alloc->select_jobinfo, SELECT_DATA_BLOCK_ID, + &bgl_part_id); + if (bgl_part_id) { + env_array_overwrite_fmt(dest, "MPIRUN_PARTITION", "%s", + bgl_part_id); + env_array_overwrite_fmt(dest, "MPIRUN_NOFREE", "%d", 1); + env_array_overwrite_fmt(dest, "MPIRUN_NOALLOCATE", "%d", 1); + } + /* obsolete */ env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", alloc->job_id); env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", alloc->node_cnt); diff --git a/src/common/plugstack.c b/src/common/plugstack.c index 62cb45ba3..9776df489 100644 --- a/src/common/plugstack.c +++ b/src/common/plugstack.c @@ -41,6 +41,8 @@ #include <string.h> #include <ctype.h> #include <stdlib.h> +#include <libgen.h> +#include <glob.h> #include "src/common/plugin.h" #include "src/common/xmalloc.h" @@ -60,6 +62,7 @@ #define REQUIRED "required" #define OPTIONAL "optional" +#define INCLUDE "include" struct spank_plugin_operations { spank_f *init; @@ -145,6 +148,7 @@ typedef enum step_fn { struct spank_handle { # define SPANK_MAGIC 0x00a5a500 int magic; /* Magic identifier to ensure validity. */ + struct spank_plugin *plugin; /* Current plugin using handle */ spank_handle_type_t type; /* remote(slurmd) || local(srun) */ step_fn_t phase; /* Which spank fn are we called from? */ void * job; /* Reference to current srun|slurmd job */ @@ -168,7 +172,6 @@ static const char * default_spank_path = NULL; */ static int _spank_plugin_options_cache(struct spank_plugin *p); - static void _argv_append(char ***argv, int ac, const char *newarg) { *argv = xrealloc(*argv, (++ac + 1) * sizeof(char *)); @@ -177,9 +180,32 @@ static void _argv_append(char ***argv, int ac, const char *newarg) return; } +typedef enum { + CF_ERROR = 0, + CF_OPTIONAL, + CF_REQUIRED, + CF_INCLUDE, +} cf_line_t; + +static cf_line_t _plugin_stack_line_type (const char *str) +{ + if (strcmp(str, REQUIRED) == 0) + return (CF_REQUIRED); + else if (strcmp(str, OPTIONAL) == 0) + return (CF_OPTIONAL); + else if (strcmp(str, INCLUDE) == 0) + return (CF_INCLUDE); + else { + error("spank: Invalid option \"%s\". Must be %s, %s or %s", + str, REQUIRED, OPTIONAL, INCLUDE); + return (CF_ERROR); + } +} + + static int _plugin_stack_parse_line(char *line, char **plugin, int *acp, char ***argv, - bool * required) + cf_line_t * type) { int ac; const char *separators = " \t\n"; @@ -205,19 +231,10 @@ _plugin_stack_parse_line(char *line, char **plugin, int *acp, char ***argv, *s = '\0'; if (!(option = strtok_r(line, separators, &sp))) - return 0; - - if (strncmp(option, REQUIRED, strlen(option)) == 0) { - *required = true; - } - else if (strncmp(option, OPTIONAL, strlen(option)) == 0) { - *required = false; - } - else { - error("spank: Invalid option \"%s\". Must be either %s or %s", - option, REQUIRED, OPTIONAL); + return (0); + + if (((*type) = _plugin_stack_line_type(option)) == CF_ERROR) return (-1); - } if (!(path = strtok_r(NULL, separators, &sp))) return (-1); @@ -325,24 +342,27 @@ _spank_plugin_find (const char *path, const char *file) return (NULL); } +static int _spank_conf_include (const char *, int, const char *, List *); + static int -_spank_stack_process_line(const char *file, int line, char *buf, - struct spank_plugin **plugin) +_spank_stack_process_line(const char *file, int line, char *buf, List *stackp) { char **argv; int ac; char *path; - bool required = FALSE; + cf_line_t type = CF_REQUIRED; + bool required; struct spank_plugin *p; - *plugin = NULL; - - if (_plugin_stack_parse_line(buf, &path, &ac, &argv, &required) < 0) { + if (_plugin_stack_parse_line(buf, &path, &ac, &argv, &type) < 0) { error("spank: %s:%d: Invalid line. Ignoring.", file, line); return (0); } + if (type == CF_INCLUDE) + return (_spank_conf_include (file, line, path, stackp)); + if (path == NULL) /* No plugin listed on this line */ return (0); @@ -355,21 +375,31 @@ _spank_stack_process_line(const char *file, int line, char *buf, } } + required = (type == CF_REQUIRED); if (!(p = _spank_plugin_create(path, ac, argv, required))) { if (required) - error ("spank: %s:%d: Failed to load plugin %s. Aborting.", - file, line, path); + error ("spank: %s:%d:" + " Failed to load plugin %s. Aborting.", + file, line, path); else - verbose ("spank: %s:%d: Failed to load optional plugin %s. Ignored.", - file, line, path); + verbose ("spank: %s:%d:" + "Failed to load optional plugin %s. Ignored.", + file, line, path); return (required ? -1 : 0); } + if (*stackp == NULL) + *stackp = list_create((ListDelF) _spank_plugin_destroy); + + verbose ("spank: %s:%d: Loaded plugin %s", + file, line, xbasename (p->fq_path)); - *plugin = p; + list_append (*stackp, p); + _spank_plugin_options_cache(p); return (0); } + static int _spank_stack_create(const char *path, List * listp) { int line; @@ -390,24 +420,8 @@ static int _spank_stack_create(const char *path, List * listp) line = 1; while (fgets(buf, sizeof(buf), fp)) { - struct spank_plugin *p; - - if (_spank_stack_process_line(path, line, buf, &p) < 0) + if (_spank_stack_process_line(path, line, buf, listp) < 0) goto fail_immediately; - - if (p == NULL) - continue; - - if (*listp == NULL) - *listp = - list_create((ListDelF) _spank_plugin_destroy); - - verbose("spank: loaded plugin %s\n", - xbasename(p->fq_path)); - list_append(*listp, p); - - _spank_plugin_options_cache(p); - line++; } @@ -423,12 +437,67 @@ static int _spank_stack_create(const char *path, List * listp) return (-1); } +static int +_spank_conf_include (const char *file, int lineno, const char *pattern, + List *stackp) +{ + int rc = 0; + glob_t gl; + size_t i; + char *copy = NULL; + + if (pattern == NULL) { + error ("%s: %d: Invalid include directive", file, lineno); + return (SLURM_ERROR); + } + + if (pattern[0] != '/') { + char *dirc = xstrdup (file); + char *dname = dirname (dirc); + + if (dname != NULL) { + xstrfmtcat (copy, "%s/%s", dname, pattern); + pattern = copy; + } + xfree (dirc); + } + + verbose ("%s: %d: include \"%s\"", file, lineno, pattern); + + rc = glob (pattern, 0, NULL, &gl); + switch (rc) { + case 0: + for (i = 0; i < gl.gl_pathc; i++) { + rc = _spank_stack_create (gl.gl_pathv[i], stackp); + if (rc < 0) + break; + } + break; + case GLOB_NOMATCH: + break; + case GLOB_NOSPACE: + errno = ENOMEM; + case GLOB_ABORTED: + verbose ("%s:%d: cannot read dir %s: %m", + file, lineno, pattern); + break; + default: + error ("Unknown glob(3) return code = %d", rc); + break; + } + + xfree (copy); + globfree (&gl); + return (rc); +} + static int _spank_handle_init(struct spank_handle *spank, void * arg, int taskid, step_fn_t fn) { memset(spank, 0, sizeof(*spank)); spank->magic = SPANK_MAGIC; + spank->plugin = NULL; spank->phase = fn; @@ -494,6 +563,8 @@ static int _do_call_stack(step_fn_t type, void * job, int taskid) while ((sp = list_next(i))) { const char *name = xbasename(sp->fq_path); + spank->plugin = sp; + switch (type) { case SPANK_INIT: if (sp->ops.init) { @@ -656,6 +727,33 @@ static int _spank_next_option_val(void) return (optval); } +static struct spank_option * _spank_option_copy(struct spank_option *opt) +{ + struct spank_option *copy = xmalloc (sizeof (*copy)); + + memset (copy, 0, sizeof (*copy)); + + copy->name = xstrdup (opt->name); + copy->has_arg = opt->has_arg; + copy->val = opt->val; + copy->cb = opt->cb; + + if (opt->arginfo) + copy->arginfo = xstrdup (opt->arginfo); + if (opt->usage) + copy->usage = xstrdup (opt->usage); + + return (copy); +} + +static void _spank_option_destroy(struct spank_option *opt) +{ + xfree (opt->name); + xfree (opt->arginfo); + xfree (opt->usage); + xfree (opt); +} + static struct spank_plugin_opt *_spank_plugin_opt_create(struct spank_plugin *p, struct @@ -663,7 +761,7 @@ static struct spank_plugin_opt *_spank_plugin_opt_create(struct int disabled) { struct spank_plugin_opt *spopt = xmalloc(sizeof(*spopt)); - spopt->opt = opt; + spopt->opt = _spank_option_copy (opt); spopt->plugin = p; spopt->optval = _spank_next_option_val(); spopt->found = 0; @@ -676,6 +774,7 @@ static struct spank_plugin_opt *_spank_plugin_opt_create(struct void _spank_plugin_opt_destroy(struct spank_plugin_opt *spopt) { + _spank_option_destroy (spopt->opt); xfree(spopt->optarg); xfree(spopt); } @@ -690,9 +789,56 @@ static int _opt_by_name(struct spank_plugin_opt *opt, char *optname) return (strcmp(opt->opt->name, optname) == 0); } -static int _spank_plugin_options_cache(struct spank_plugin *p) +static int +_spank_option_register(struct spank_plugin *p, struct spank_option *opt) { int disabled = 0; + struct spank_plugin_opt *spopt; + + spopt = list_find_first(option_cache, + (ListFindF) _opt_by_name, opt->name); + + if (spopt) { + struct spank_plugin *q = spopt->plugin; + info("spank: option \"%s\" provided by both %s and %s", + opt->name, xbasename(p->fq_path), + xbasename(q->fq_path)); + /* + * Disable this option, but still cache it, in case + * options are loaded in a different order on the + * remote side. + */ + disabled = 1; + } + + if ((strlen(opt->name) > SPANK_OPTION_MAXLEN)) { + error("spank: option \"%s\" provided by %s too long. Ignoring.", + opt->name, p->name); + return (ESPANK_NOSPACE); + } + + verbose("SPANK: appending plugin option \"%s\"\n", opt->name); + list_append(option_cache, _spank_plugin_opt_create(p, opt, disabled)); + + return (ESPANK_SUCCESS); +} + +spank_err_t spank_option_register(spank_t sp, struct spank_option *opt) +{ + if (sp->phase != SPANK_INIT) + return (ESPANK_BAD_ARG); + + if (!sp->plugin) + error ("Uh, oh, no current plugin!"); + + if (!opt || !opt->name || !opt->usage) + return (ESPANK_BAD_ARG); + + return (_spank_option_register(sp->plugin, opt)); +} + +static int _spank_plugin_options_cache(struct spank_plugin *p) +{ struct spank_option *opt = p->opts; if ((opt == NULL) || opt->name == NULL) @@ -703,38 +849,8 @@ static int _spank_plugin_options_cache(struct spank_plugin *p) list_create((ListDelF) _spank_plugin_opt_destroy); } - for (; opt && opt->name != NULL; opt++) { - struct spank_plugin_opt *spopt; - - spopt = - list_find_first(option_cache, (ListFindF) _opt_by_name, - opt->name); - if (spopt) { - struct spank_plugin *q = spopt->plugin; - info("spank: option \"%s\" " - "provided by both %s and %s", - opt->name, xbasename(p->fq_path), - xbasename(q->fq_path)); - /* - * Disable this option, but still cache it, in case - * options are loaded in a different order on the - * remote side. - */ - disabled = 1; - } - - if ((strlen(opt->name) > SPANK_OPTION_MAXLEN)) { - error - ("spank: option \"%s\" provided by %s too long." - " Ignoring.", opt->name, p->name); - continue; - } - - verbose("SPANK: appending plugin option \"%s\"\n", - opt->name); - list_append(option_cache, - _spank_plugin_opt_create(p, opt, disabled)); - } + for (; opt && opt->name != NULL; opt++) + _spank_option_register(p, opt); return (0); } diff --git a/src/common/read_config.c b/src/common/read_config.c index 06951e0f5..d683c7a3f 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -1831,6 +1831,12 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) conf->private_data |= PRIVATE_DATA_NODES; if (strstr(temp_str, "partition")) conf->private_data |= PRIVATE_DATA_PARTITIONS; + if (strstr(temp_str, "usage")) + conf->private_data |= PRIVATE_DATA_USAGE; + if (strstr(temp_str, "users")) + conf->private_data |= PRIVATE_DATA_USERS; + if (strstr(temp_str, "accounts")) + conf->private_data |= PRIVATE_DATA_ACCOUNTS; if (strstr(temp_str, "all")) conf->private_data = 0xffff; xfree(temp_str); diff --git a/src/common/slurm_accounting_storage.c b/src/common/slurm_accounting_storage.c index 568c052f2..cb10b7b2f 100644 --- a/src/common/slurm_accounting_storage.c +++ b/src/common/slurm_accounting_storage.c @@ -99,19 +99,19 @@ typedef struct slurm_acct_storage_ops { acct_association_cond_t *assoc_cond); List (*remove_qos) (void *db_conn, uint32_t uid, acct_qos_cond_t *qos_cond); - List (*get_users) (void *db_conn, + List (*get_users) (void *db_conn, uint32_t uid, acct_user_cond_t *user_cond); - List (*get_accts) (void *db_conn, + List (*get_accts) (void *db_conn, uint32_t uid, acct_account_cond_t *acct_cond); - List (*get_clusters) (void *db_conn, + List (*get_clusters) (void *db_conn, uint32_t uid, acct_cluster_cond_t *cluster_cond); - List (*get_associations) (void *db_conn, + List (*get_associations) (void *db_conn, uint32_t uid, acct_association_cond_t *assoc_cond); - List (*get_qos) (void *db_conn, + List (*get_qos) (void *db_conn, uint32_t uid, acct_qos_cond_t *qos_cond); - List (*get_txn) (void *db_conn, + List (*get_txn) (void *db_conn, uint32_t uid, acct_txn_cond_t *txn_cond); - int (*get_usage) (void *db_conn, + int (*get_usage) (void *db_conn, uint32_t uid, void *acct_assoc, time_t start, time_t end); @@ -129,7 +129,7 @@ typedef struct slurm_acct_storage_ops { int (*cluster_procs) (void *db_conn, char *cluster, uint32_t procs, time_t event_time); - int (*c_get_usage) (void *db_conn, + int (*c_get_usage) (void *db_conn, uint32_t uid, void *cluster_rec, time_t start, time_t end); int (*register_ctld) (char *cluster, uint16_t port); @@ -143,11 +143,11 @@ typedef struct slurm_acct_storage_ops { struct step_record *step_ptr); int (*job_suspend) (void *db_conn, struct job_record *job_ptr); - List (*get_jobs) (void *db_conn, + List (*get_jobs) (void *db_conn, uint32_t uid, List selected_steps, List selected_parts, void *params); - List (*get_jobs_cond) (void *db_conn, + List (*get_jobs_cond) (void *db_conn, uint32_t uid, acct_job_cond_t *job_cond); void (*job_archive) (void *db_conn, List selected_parts, void *params); @@ -2582,63 +2582,66 @@ extern List acct_storage_g_remove_qos(void *db_conn, uint32_t uid, (db_conn, uid, qos_cond); } -extern List acct_storage_g_get_users(void *db_conn, +extern List acct_storage_g_get_users(void *db_conn, uint32_t uid, acct_user_cond_t *user_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; - return (*(g_acct_storage_context->ops.get_users))(db_conn, user_cond); + return (*(g_acct_storage_context->ops.get_users)) + (db_conn, uid, user_cond); } -extern List acct_storage_g_get_accounts(void *db_conn, +extern List acct_storage_g_get_accounts(void *db_conn, uint32_t uid, acct_account_cond_t *acct_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.get_accts)) - (db_conn, acct_cond); + (db_conn, uid, acct_cond); } -extern List acct_storage_g_get_clusters(void *db_conn, +extern List acct_storage_g_get_clusters(void *db_conn, uint32_t uid, acct_cluster_cond_t *cluster_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.get_clusters)) - (db_conn, cluster_cond); + (db_conn, uid, cluster_cond); } -extern List acct_storage_g_get_associations(void *db_conn, +extern List acct_storage_g_get_associations(void *db_conn, uint32_t uid, acct_association_cond_t *assoc_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.get_associations)) - (db_conn, assoc_cond); + (db_conn, uid, assoc_cond); } -extern List acct_storage_g_get_qos(void *db_conn, acct_qos_cond_t *qos_cond) +extern List acct_storage_g_get_qos(void *db_conn, uint32_t uid, + acct_qos_cond_t *qos_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; - return (*(g_acct_storage_context->ops.get_qos))(db_conn, qos_cond); + return (*(g_acct_storage_context->ops.get_qos))(db_conn, uid, qos_cond); } -extern List acct_storage_g_get_txn(void *db_conn, acct_txn_cond_t *txn_cond) +extern List acct_storage_g_get_txn(void *db_conn, uint32_t uid, + acct_txn_cond_t *txn_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; - return (*(g_acct_storage_context->ops.get_txn))(db_conn, txn_cond); + return (*(g_acct_storage_context->ops.get_txn))(db_conn, uid, txn_cond); } -extern int acct_storage_g_get_usage(void *db_conn, +extern int acct_storage_g_get_usage(void *db_conn, uint32_t uid, void *acct_assoc, time_t start, time_t end) { if (slurm_acct_storage_init(NULL) < 0) return SLURM_ERROR; return (*(g_acct_storage_context->ops.get_usage)) - (db_conn, acct_assoc, start, end); + (db_conn, uid, acct_assoc, start, end); } extern int acct_storage_g_roll_usage(void *db_conn, @@ -2686,13 +2689,13 @@ extern int clusteracct_storage_g_cluster_procs(void *db_conn, extern int clusteracct_storage_g_get_usage( - void *db_conn, void *cluster_rec, + void *db_conn, uint32_t uid, void *cluster_rec, time_t start, time_t end) { if (slurm_acct_storage_init(NULL) < 0) return SLURM_ERROR; return (*(g_acct_storage_context->ops.c_get_usage)) - (db_conn, cluster_rec, start, end); + (db_conn, uid, cluster_rec, start, end); } extern int clusteracct_storage_g_register_ctld(char *cluster, uint16_t port) @@ -2764,7 +2767,7 @@ extern int jobacct_storage_g_job_suspend (void *db_conn, * returns List of job_rec_t * * note List needs to be freed when called */ -extern List jobacct_storage_g_get_jobs(void *db_conn, +extern List jobacct_storage_g_get_jobs(void *db_conn, uint32_t uid, List selected_steps, List selected_parts, void *params) @@ -2772,7 +2775,7 @@ extern List jobacct_storage_g_get_jobs(void *db_conn, if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.get_jobs)) - (db_conn, selected_steps, selected_parts, params); + (db_conn, uid, selected_steps, selected_parts, params); } /* @@ -2780,13 +2783,13 @@ extern List jobacct_storage_g_get_jobs(void *db_conn, * returns List of job_rec_t * * note List needs to be freed when called */ -extern List jobacct_storage_g_get_jobs_cond(void *db_conn, +extern List jobacct_storage_g_get_jobs_cond(void *db_conn, uint32_t uid, acct_job_cond_t *job_cond) { if (slurm_acct_storage_init(NULL) < 0) return NULL; return (*(g_acct_storage_context->ops.get_jobs_cond)) - (db_conn, job_cond); + (db_conn, uid, job_cond); } /* diff --git a/src/common/slurm_accounting_storage.h b/src/common/slurm_accounting_storage.h index d0a79d1b1..e06892a12 100644 --- a/src/common/slurm_accounting_storage.h +++ b/src/common/slurm_accounting_storage.h @@ -520,7 +520,7 @@ extern List acct_storage_g_remove_qos( * returns List of acct_user_rec_t * * note List needs to be freed when called */ -extern List acct_storage_g_get_users(void *db_conn, +extern List acct_storage_g_get_users(void *db_conn, uint32_t uid, acct_user_cond_t *user_cond); /* @@ -530,7 +530,7 @@ extern List acct_storage_g_get_users(void *db_conn, * returns List of acct_account_rec_t * * note List needs to be freed when called */ -extern List acct_storage_g_get_accounts(void *db_conn, +extern List acct_storage_g_get_accounts(void *db_conn, uint32_t uid, acct_account_cond_t *acct_cond); /* @@ -541,7 +541,7 @@ extern List acct_storage_g_get_accounts(void *db_conn, * note List needs to be freed when called */ extern List acct_storage_g_get_clusters( - void *db_conn, acct_cluster_cond_t *cluster_cond); + void *db_conn, uint32_t uid, acct_cluster_cond_t *cluster_cond); /* * get info from the storage @@ -550,7 +550,7 @@ extern List acct_storage_g_get_clusters( * note List needs to be freed when called */ extern List acct_storage_g_get_associations( - void *db_conn, acct_association_cond_t *assoc_cond); + void *db_conn, uint32_t uid, acct_association_cond_t *assoc_cond); /* @@ -559,7 +559,8 @@ extern List acct_storage_g_get_associations( * RET: List of acct_qos_rec_t * * note List needs to be freed when called */ -extern List acct_storage_g_get_qos(void *db_conn, acct_qos_cond_t *qos_cond); +extern List acct_storage_g_get_qos(void *db_conn, uint32_t uid, + acct_qos_cond_t *qos_cond); /* * get info from the storage @@ -567,7 +568,8 @@ extern List acct_storage_g_get_qos(void *db_conn, acct_qos_cond_t *qos_cond); * RET: List of acct_txn_rec_t * * note List needs to be freed when called */ -extern List acct_storage_g_get_txn(void *db_conn, acct_txn_cond_t *txn_cond); +extern List acct_storage_g_get_txn(void *db_conn, uint32_t uid, + acct_txn_cond_t *txn_cond); /* * get info from the storage @@ -577,7 +579,7 @@ extern List acct_storage_g_get_txn(void *db_conn, acct_txn_cond_t *txn_cond); * RET: SLURM_SUCCESS on success SLURM_ERROR else */ extern int acct_storage_g_get_usage( - void *db_conn, void *assoc, time_t start, time_t end); + void *db_conn, uint32_t uid, void *assoc, time_t start, time_t end); /* * roll up data in the storage * IN: sent_start (option time to do a re-roll or start from this point) @@ -632,7 +634,7 @@ extern int clusteracct_storage_g_register_ctld(char *cluster, uint16_t port); * RET: SLURM_SUCCESS on success SLURM_ERROR else */ extern int clusteracct_storage_g_get_usage( - void *db_conn, void *cluster_rec, + void *db_conn, uint32_t uid, void *cluster_rec, time_t start, time_t end); /* @@ -670,7 +672,7 @@ extern int jobacct_storage_g_job_suspend (void *db_conn, * returns List of jobacct_job_rec_t * * note List needs to be freed when called */ -extern List jobacct_storage_g_get_jobs(void *db_conn, +extern List jobacct_storage_g_get_jobs(void *db_conn, uint32_t uid, List selected_steps, List selected_parts, void *params); @@ -680,7 +682,7 @@ extern List jobacct_storage_g_get_jobs(void *db_conn, * returns List of jobacct_job_rec_t * * note List needs to be freed when called */ -extern List jobacct_storage_g_get_jobs_cond(void *db_conn, +extern List jobacct_storage_g_get_jobs_cond(void *db_conn, uint32_t uid, acct_job_cond_t *job_cond); /* diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 6000ad0c3..a2d295e31 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -315,6 +315,24 @@ char *slurm_get_plugin_dir(void) return plugin_dir; } +/* slurm_get_private_data + * get private data from slurmctld_conf object + */ +uint16_t slurm_get_private_data(void) +{ + uint16_t private_data = 0; + slurm_ctl_conf_t *conf; + + if(slurmdbd_conf) { + private_data = slurmdbd_conf->private_data; + } else { + conf = slurm_conf_lock(); + private_data = conf->private_data; + slurm_conf_unlock(); + } + return private_data; +} + /* slurm_get_state_save_location * get state_save_location from slurmctld_conf object from slurmctld_conf object * RET char * - state_save_location directory, MUST be xfreed by caller diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h index c293ab2ce..566900fc3 100644 --- a/src/common/slurm_protocol_api.h +++ b/src/common/slurm_protocol_api.h @@ -169,6 +169,12 @@ uint16_t slurm_get_slurmdbd_port(void); */ char *slurm_get_plugin_dir(void); +/* slurm_get_private_data + * get private data from slurmctld_conf object + * RET uint16_t - private_data + */ +uint16_t slurm_get_private_data(void); + /* slurm_get_state_save_location * get state_save_location from slurmctld_conf object from slurmctld_conf object * RET char * - state_save_location directory, MUST be xfreed by caller diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index a5ac871f7..4a63d4277 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -721,23 +721,39 @@ private_data_string(uint16_t private_data, char *str, int str_len) { if (str_len > 0) str[0] = '\0'; - if (str_len < 22) { + if (str_len < 42) { error("private_data_string: output buffer too small"); return; } if (private_data & PRIVATE_DATA_JOBS) - strcat(str, "jobs"); + strcat(str, "jobs"); //4 len if (private_data & PRIVATE_DATA_NODES) { if (str[0]) strcat(str, ","); - strcat(str, "nodes"); + strcat(str, "nodes"); //6 len } if (private_data & PRIVATE_DATA_PARTITIONS) { if (str[0]) strcat(str, ","); - strcat(str, "partitions"); + strcat(str, "partitions"); //11 len } + if (private_data & PRIVATE_DATA_USAGE) { + if (str[0]) + strcat(str, ","); + strcat(str, "usage"); //6 len + } + if (private_data & PRIVATE_DATA_USERS) { + if (str[0]) + strcat(str, ","); + strcat(str, "users"); //6 len + } + if (private_data & PRIVATE_DATA_ACCOUNTS) { + if (str[0]) + strcat(str, ","); + strcat(str, "accounts"); //9 len + } + // total len 42 } char *job_state_string(enum job_states inx) diff --git a/src/common/slurm_protocol_socket_implementation.c b/src/common/slurm_protocol_socket_implementation.c index adb19f2ce..d16960ab4 100644 --- a/src/common/slurm_protocol_socket_implementation.c +++ b/src/common/slurm_protocol_socket_implementation.c @@ -494,6 +494,8 @@ slurm_fd _slurm_open_stream(slurm_addr *addr, bool retry) { int retry_cnt; slurm_fd fd; + uint16_t port; + char ip[32]; if ( (addr->sin_family == 0) || (addr->sin_port == 0) ) { error("Error connecting, bad data: family = %u, port = %u", @@ -510,12 +512,15 @@ slurm_fd _slurm_open_stream(slurm_addr *addr, bool retry) } if (retry_cnt) { - if (retry_cnt == 1) - debug3("Error connecting, picking new stream port"); + if (retry_cnt == 1) { + debug3("Error connecting, " + "picking new stream port"); + } _sock_bind_wild(fd); } - rc = _slurm_connect(fd, (struct sockaddr const *)addr, sizeof(*addr)); + rc = _slurm_connect(fd, (struct sockaddr const *)addr, + sizeof(*addr)); if (rc >= 0) /* success */ break; if ((errno != ECONNREFUSED) || @@ -531,7 +536,9 @@ slurm_fd _slurm_open_stream(slurm_addr *addr, bool retry) return fd; error: - debug2("Error connecting slurm stream socket: %m"); + slurm_get_ip_str(addr, &port, ip, sizeof(ip)); + debug2("Error connecting slurm stream socket at %s:%d: %m", + ip, ntohs(port)); if ((_slurm_close_stream(fd) < 0) && (errno == EINTR)) _slurm_close_stream(fd); /* try again */ return SLURM_SOCKET_ERROR; diff --git a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c index fff6191be..a37ad84ee 100644 --- a/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c +++ b/src/plugins/accounting_storage/filetxt/accounting_storage_filetxt.c @@ -356,43 +356,43 @@ extern List acct_storage_p_remove_qos(void *db_conn, uint32_t uid, return NULL; } -extern List acct_storage_p_get_users(void *db_conn, +extern List acct_storage_p_get_users(void *db_conn, uid_t uid, acct_user_cond_t *user_q) { return NULL; } -extern List acct_storage_p_get_accts(void *db_conn, +extern List acct_storage_p_get_accts(void *db_conn, uid_t uid, acct_account_cond_t *acct_q) { return NULL; } -extern List acct_storage_p_get_clusters(void *db_conn, +extern List acct_storage_p_get_clusters(void *db_conn, uid_t uid, acct_account_cond_t *cluster_q) { return NULL; } -extern List acct_storage_p_get_associations(void *db_conn, +extern List acct_storage_p_get_associations(void *db_conn, uid_t uid, acct_association_cond_t *assoc_q) { return NULL; } -extern List acct_storage_p_get_qos(void *db_conn, +extern List acct_storage_p_get_qos(void *db_conn, uid_t uid, acct_qos_cond_t *qos_cond) { return NULL; } -extern List acct_storage_p_get_txn(void *db_conn, +extern List acct_storage_p_get_txn(void *db_conn, uid_t uid, acct_txn_cond_t *txn_cond) { return NULL; } -extern int acct_storage_p_get_usage(void *db_conn, +extern int acct_storage_p_get_usage(void *db_conn, uid_t uid, acct_association_rec_t *acct_assoc, time_t start, time_t end) { @@ -439,7 +439,7 @@ extern int clusteracct_storage_p_cluster_procs(void *db_conn, } extern int clusteracct_storage_p_get_usage( - void *db_conn, + void *db_conn, uid_t uid, acct_cluster_rec_t *cluster_rec, time_t start, time_t end) { @@ -832,7 +832,7 @@ extern int jobacct_storage_p_suspend(void *db_conn, * in/out job_list List of job_rec_t * * note List needs to be freed when called */ -extern List jobacct_storage_p_get_jobs(void *db_conn, +extern List jobacct_storage_p_get_jobs(void *db_conn, uid_t uid, List selected_steps, List selected_parts, sacct_parameters_t *params) @@ -873,7 +873,7 @@ extern List jobacct_storage_p_get_jobs(void *db_conn, * returns List of jobacct_job_rec_t * * note List needs to be freed when called */ -extern List jobacct_storage_p_get_jobs_cond(void *db_conn, +extern List jobacct_storage_p_get_jobs_cond(void *db_conn, uid_t uid, acct_job_cond_t *job_cond) { return filetxt_jobacct_process_get_jobs(job_cond); diff --git a/src/plugins/accounting_storage/gold/accounting_storage_gold.c b/src/plugins/accounting_storage/gold/accounting_storage_gold.c index 2d5a9db44..d7349ecc4 100644 --- a/src/plugins/accounting_storage/gold/accounting_storage_gold.c +++ b/src/plugins/accounting_storage/gold/accounting_storage_gold.c @@ -1041,82 +1041,6 @@ extern int acct_storage_p_add_associations(void *db_conn, return rc; } -extern int acct_storage_p_get_assoc_id(void *db_conn, - acct_association_rec_t *assoc) -{ - ListIterator itr = NULL; - acct_association_rec_t * found_assoc = NULL; - acct_association_rec_t * ret_assoc = NULL; - - if(!local_association_list) - local_association_list = acct_storage_g_get_associations(NULL, - NULL); - - if((!assoc->cluster && !assoc->acct) && !assoc->id) { - error("acct_storage_p_get_assoc_id: " - "You need to supply a cluster and account name to get " - "an association."); - return SLURM_ERROR; - } - - itr = list_iterator_create(local_association_list); - while((found_assoc = list_next(itr))) { - if(assoc->id) { - if(assoc->id == found_assoc->id) { - ret_assoc = found_assoc; - break; - } - continue; - } else { - if((!found_assoc->acct - || strcasecmp(assoc->acct, - found_assoc->acct)) - || (!assoc->cluster - || strcasecmp(assoc->cluster, - found_assoc->cluster)) - || (assoc->user - && (!found_assoc->user - || strcasecmp(assoc->user, - found_assoc->user))) - || (!assoc->user && found_assoc->user - && strcasecmp("none", - found_assoc->user))) - continue; - if(assoc->partition - && (!assoc->partition - || strcasecmp(assoc->partition, - found_assoc->partition))) { - ret_assoc = found_assoc; - continue; - } - } - ret_assoc = found_assoc; - break; - } - list_iterator_destroy(itr); - - if(!ret_assoc) - return SLURM_ERROR; - - assoc->id = ret_assoc->id; - if(!assoc->user) - assoc->user = ret_assoc->user; - if(!assoc->acct) - assoc->acct = ret_assoc->acct; - if(!assoc->cluster) - assoc->cluster = ret_assoc->cluster; - if(!assoc->partition) - assoc->partition = ret_assoc->partition; - - return SLURM_SUCCESS; -} - -extern int acct_storage_p_validate_assoc_id(void *db_conn, - uint32_t assoc_id) -{ - return SLURM_SUCCESS; -} - extern int acct_storage_p_add_qos(void *db_conn, uint32_t uid, List qos_list) { @@ -2085,7 +2009,7 @@ extern List acct_storage_p_remove_qos(void *db_conn, uint32_t uid, return NULL; } -extern List acct_storage_p_get_users(void *db_conn, +extern List acct_storage_p_get_users(void *db_conn, uid_t uid, acct_user_cond_t *user_q) { gold_request_t *gold_request = NULL; @@ -2177,7 +2101,7 @@ empty: return user_list; } -extern List acct_storage_p_get_accts(void *db_conn, +extern List acct_storage_p_get_accts(void *db_conn, uid_t uid, acct_account_cond_t *acct_q) { gold_request_t *gold_request = NULL; @@ -2286,7 +2210,7 @@ empty: return acct_list; } -extern List acct_storage_p_get_clusters(void *db_conn, +extern List acct_storage_p_get_clusters(void *db_conn, uid_t uid, acct_cluster_cond_t *cluster_q) { gold_request_t *gold_request = NULL; @@ -2349,7 +2273,7 @@ empty: return cluster_list; } -extern List acct_storage_p_get_associations(void *db_conn, +extern List acct_storage_p_get_associations(void *db_conn, uid_t uid, acct_association_cond_t *assoc_q) { @@ -2461,19 +2385,19 @@ empty: return association_list; } -extern List acct_storage_p_get_qos(void *db_conn, +extern List acct_storage_p_get_qos(void *db_conn, uid_t uid, acct_qos_cond_t *qos_cond) { return NULL; } -extern List acct_storage_p_get_txn(void *db_conn, +extern List acct_storage_p_get_txn(void *db_conn, uid_t uid, acct_txn_cond_t *txn_cond) { return NULL; } -extern int acct_storage_p_get_usage(void *db_conn, +extern int acct_storage_p_get_usage(void *db_conn, uid_t uid, acct_association_rec_t *acct_assoc, time_t start, time_t end) { @@ -2929,7 +2853,7 @@ extern int clusteracct_storage_p_cluster_procs(void *db_conn, } extern int clusteracct_storage_p_get_usage( - void *db_conn, + void *db_conn, uid_t uid, acct_cluster_rec_t *cluster_rec, time_t start, time_t end) { @@ -3081,7 +3005,7 @@ extern int jobacct_storage_p_suspend(void *db_conn, * returns List of jobacct_job_rec_t * * note List needs to be freed when called */ -extern List jobacct_storage_p_get_jobs(void *db_conn, +extern List jobacct_storage_p_get_jobs(void *db_conn, uid_t uid, List selected_steps, List selected_parts, sacct_parameters_t *params) @@ -3296,7 +3220,7 @@ extern List jobacct_storage_p_get_jobs(void *db_conn, * returns List of jobacct_job_rec_t * * note List needs to be freed when called */ -extern List jobacct_storage_p_get_jobs_cond(void *db_conn, +extern List jobacct_storage_p_get_jobs_cond(void *db_conn, uid_t uid, void *job_cond) { info("not implemented"); diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c index af96b714e..b3f3a6693 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c @@ -114,6 +114,8 @@ char *user_table = "user_table"; char *last_ran_table = "last_ran_table"; char *suspend_table = "suspend_table"; +static int normal_qos_id = NO_VAL; + extern int acct_storage_p_commit(mysql_conn_t *mysql_conn, bool commit); extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, @@ -121,15 +123,15 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, List association_list); extern List acct_storage_p_get_associations( - mysql_conn_t *mysql_conn, + mysql_conn_t *mysql_conn, uid_t uid, acct_association_cond_t *assoc_cond); -extern int acct_storage_p_get_usage(mysql_conn_t *mysql_conn, +extern int acct_storage_p_get_usage(mysql_conn_t *mysql_conn, uid_t uid, acct_association_rec_t *acct_assoc, time_t start, time_t end); extern int clusteracct_storage_p_get_usage( - mysql_conn_t *mysql_conn, + mysql_conn_t *mysql_conn, uid_t uid, acct_cluster_rec_t *cluster_rec, time_t start, time_t end); /* This should be added to the beginning of each function to make sure @@ -303,7 +305,8 @@ static int _move_account(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, * should work either way in the tree. (i.e. move child to be parent * of current parent, and parent to be child of child.) */ -static int _move_parent(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, +static int _move_parent(mysql_conn_t *mysql_conn, uid_t uid, + uint32_t lft, uint32_t rgt, char *cluster, char *id, char *old_parent, char *new_parent) { @@ -374,7 +377,7 @@ static int _move_parent(mysql_conn_t *mysql_conn, uint32_t lft, uint32_t rgt, /* now we need to send the update of the new parents and * limits, so just to be safe, send the whole tree */ - assoc_list = acct_storage_p_get_associations(mysql_conn, NULL); + assoc_list = acct_storage_p_get_associations(mysql_conn, uid, NULL); /* NOTE: you can not use list_pop, or list_push anywhere either, since mysql is exporting something of the same type as a macro, @@ -866,15 +869,9 @@ static int _remove_common(mysql_conn_t *mysql_conn, return rc; } - if(!has_jobs) - query = xstrdup_printf( - "delete from %s where creation_time>%d && (%s);" - "delete from %s where creation_time>%d && (%s);" - "delete from %s where creation_time>%d && (%s);", - assoc_day_table, day_old, loc_assoc_char, - assoc_hour_table, day_old, loc_assoc_char, - assoc_month_table, day_old, loc_assoc_char); - + /* We should not have to delete from usage table, only flag since we + * only delete things that are typos. + */ xstrfmtcat(query, "update %s set mod_time=%d, deleted=1 where (%s);" "update %s set mod_time=%d, deleted=1 where (%s);" @@ -1158,21 +1155,21 @@ static int _get_user_coords(mysql_conn_t *mysql_conn, acct_user_rec_t *user) } /* Used in job functions for getting the database index based off the - * submit time, job and assoc id. + * submit time, job and assoc id. 0 is returned if none is found */ static int _get_db_index(MYSQL *db_conn, time_t submit, uint32_t jobid, uint32_t associd) { MYSQL_RES *result = NULL; MYSQL_ROW row; - int db_index = -1; + int db_index = 0; char *query = xstrdup_printf("select id from %s where " "submit=%d and jobid=%u and associd=%u", job_table, (int)submit, jobid, associd); if(!(result = mysql_db_query_ret(db_conn, query, 0))) { xfree(query); - return -1; + return 0; } xfree(query); @@ -1182,7 +1179,7 @@ static int _get_db_index(MYSQL *db_conn, error("We can't get a db_index for this combo, " "submit=%d and jobid=%u and associd=%u.", (int)submit, jobid, associd); - return -1; + return 0; } db_index = atoi(row[0]); mysql_free_result(result); @@ -1222,7 +1219,7 @@ static int _mysql_acct_check_tables(MYSQL *db_conn) { "name", "tinytext not null" }, { "description", "text not null" }, { "organization", "text not null" }, - { "qos", "blob" }, + { "qos", "blob not null default ''" }, { NULL, NULL} }; @@ -1394,7 +1391,7 @@ static int _mysql_acct_check_tables(MYSQL *db_conn) { "deleted", "tinyint default 0" }, { "name", "tinytext not null" }, { "default_acct", "tinytext not null" }, - { "qos", "blob" }, + { "qos", "blob not null default ''" }, { "admin_level", "smallint default 1 not null" }, { NULL, NULL} }; @@ -1533,10 +1530,11 @@ static int _mysql_acct_check_tables(MYSQL *db_conn) "insert into %s " "(creation_time, mod_time, name, description) " "values (%d, %d, 'normal', 'Normal QOS default') " - "on duplicate key update deleted=0;", + "on duplicate key update id=LAST_INSERT_ID(id), " + "deleted=0;", qos_table, now, now); debug3("%s", query); - mysql_db_query(db_conn, query); + normal_qos_id = mysql_insert_ret_id(db_conn, query); xfree(query); } if(mysql_db_create_table(db_conn, step_table, @@ -1871,18 +1869,12 @@ extern int acct_storage_p_add_users(mysql_conn_t *mysql_conn, uint32_t uid, xstrfmtcat(vals, ", '%s'", qos_val); xstrfmtcat(extra, ", qos='%s'", qos_val); + } else if(normal_qos_id != NO_VAL) { + /* Add normal qos to the user */ + xstrcat(cols, ", qos"); + xstrfmtcat(vals, ", ',%d'", normal_qos_id); + xstrfmtcat(extra, ", qos=',%d'", normal_qos_id); } - /* Since I don't really want to go find out which id - * normal is we are not going to add it at all which - * isn't a big deal since if the list is blank the user - * will get it be default - */ - /* else { */ -/* /\* Add normal qos to the user *\/ */ -/* xstrcat(cols, ", qos"); */ -/* xstrfmtcat(vals, ", ',0'"); */ -/* xstrfmtcat(extra, ", qos=',0'"); */ -/* } */ if(object->admin_level != ACCT_ADMIN_NOTSET) { xstrcat(cols, ", admin_level"); @@ -2111,6 +2103,11 @@ extern int acct_storage_p_add_accts(mysql_conn_t *mysql_conn, uint32_t uid, xstrfmtcat(vals, ", '%s'", qos_val); xstrfmtcat(extra, ", qos='%s'", qos_val); + } else if(normal_qos_id != NO_VAL) { + /* Add normal qos to the account */ + xstrcat(cols, ", qos"); + xstrfmtcat(vals, ", ',%d'", normal_qos_id); + xstrfmtcat(extra, ", qos=',%d'", normal_qos_id); } query = xstrdup_printf( @@ -2209,16 +2206,6 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid, return SLURM_ERROR; assoc_list = list_create(destroy_acct_association_rec); - assoc = xmalloc(sizeof(acct_association_rec_t)); - list_append(assoc_list, assoc); - - assoc->user = xstrdup("root"); - assoc->acct = xstrdup("root"); - assoc->fairshare = NO_VAL; - assoc->max_cpu_secs_per_job = NO_VAL; - assoc->max_jobs = NO_VAL; - assoc->max_nodes_per_job = NO_VAL; - assoc->max_wall_duration_per_job = NO_VAL; user_name = uid_to_string((uid_t) uid); itr = list_iterator_create(cluster_list); @@ -2367,10 +2354,21 @@ extern int acct_storage_p_add_clusters(mysql_conn_t *mysql_conn, uint32_t uid, added++; /* Add user root by default to run from the root - * association + * association. This gets popped off so we need to + * readd it every time here. */ - xfree(assoc->cluster); + assoc = xmalloc(sizeof(acct_association_rec_t)); + list_append(assoc_list, assoc); + assoc->cluster = xstrdup(object->name); + assoc->user = xstrdup("root"); + assoc->acct = xstrdup("root"); + assoc->fairshare = NO_VAL; + assoc->max_cpu_secs_per_job = NO_VAL; + assoc->max_jobs = NO_VAL; + assoc->max_nodes_per_job = NO_VAL; + assoc->max_wall_duration_per_job = NO_VAL; + if(acct_storage_p_add_associations(mysql_conn, uid, assoc_list) == SLURM_ERROR) { error("Problem adding root user association"); @@ -2690,7 +2688,7 @@ extern int acct_storage_p_add_associations(mysql_conn_t *mysql_conn, row[MASSOC_PACCT])) { /* We need to move the parent! */ - if(_move_parent(mysql_conn, + if(_move_parent(mysql_conn, uid, atoi(row[MASSOC_LFT]), atoi(row[MASSOC_RGT]), object->cluster, @@ -2993,8 +2991,8 @@ extern List acct_storage_p_modify_users(mysql_conn_t *mysql_conn, uint32_t uid, object+1); } else if(object[0] == '+') { xstrfmtcat(vals, - ", qos=concat(" - "replace(qos, ',%s', ''), ',%s')", + ", qos=concat_ws(',', " + "replace(qos, ',%s', ''), '%s')", object+1, object+1); } else { xstrfmtcat(tmp_qos, ",%s", object); @@ -3233,8 +3231,8 @@ extern List acct_storage_p_modify_accounts( object+1); } else if(object[0] == '+') { xstrfmtcat(vals, - ", qos=concat(" - "replace(qos, ',%s', ''), ',%s')", + ", qos=concat_ws(',', " + "replace(qos, ',%s', ''), '%s')", object+1, object+1); } else { xstrfmtcat(tmp_qos, ",%s", object); @@ -3761,7 +3759,7 @@ extern List acct_storage_p_modify_associations( continue; } - if(_move_parent(mysql_conn, + if(_move_parent(mysql_conn, uid, atoi(row[MASSOC_LFT]), atoi(row[MASSOC_RGT]), row[MASSOC_CLUSTER], @@ -4363,7 +4361,6 @@ extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn, int set = 0; MYSQL_RES *result = NULL; MYSQL_ROW row; - int day_old = now - DELETE_SEC_BACK; if(!cluster_cond) { error("we need something to change"); @@ -4427,13 +4424,8 @@ extern List acct_storage_p_remove_clusters(mysql_conn_t *mysql_conn, } xfree(query); - /* if this is a cluster update the machine usage tables as well */ - query = xstrdup_printf("delete from %s where creation_time>%d && (%s);" - "delete from %s where creation_time>%d && (%s);" - "delete from %s where creation_time>%d && (%s);", - cluster_day_table, day_old, assoc_char, - cluster_hour_table, day_old, assoc_char, - cluster_month_table, day_old, assoc_char); + /* We should not need to delete any cluster usage just set it + * to deleted */ xstrfmtcat(query, "update %s set mod_time=%d, deleted=1 where (%s);" "update %s set mod_time=%d, deleted=1 where (%s);" @@ -4927,7 +4919,7 @@ extern List acct_storage_p_remove_qos(mysql_conn_t *mysql_conn, uint32_t uid, #endif } -extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn, +extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn, uid_t uid, acct_user_cond_t *user_cond) { #ifdef HAVE_MYSQL @@ -4938,9 +4930,11 @@ extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn, ListIterator itr = NULL; char *object = NULL; int set = 0; - int i=0; + int i=0, is_admin=1; MYSQL_RES *result = NULL; MYSQL_ROW row; + uint16_t private_data = 0; + acct_user_rec_t user; /* if this changes you will need to edit the corresponding enum */ char *user_req_inx[] = { @@ -4960,7 +4954,32 @@ extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn, if(_check_connection(mysql_conn) != SLURM_SUCCESS) return NULL; + memset(&user, 0, sizeof(acct_user_rec_t)); + user.uid = uid; + private_data = slurm_get_private_data(); + if (private_data & PRIVATE_DATA_USERS) { + /* This only works when running though the slurmdbd. + * THERE IS NO AUTHENTICATION WHEN RUNNNING OUT OF THE + * SLURMDBD! + */ + if(slurmdbd_conf) { + is_admin = 0; + /* we have to check the authentication here in the + * plugin since we don't know what accounts are being + * referenced until after the query. Here we will + * set if they are an operator or greater and then + * check it below after the query. + */ + if((uid == slurmdbd_conf->slurm_user_id || uid == 0) + || assoc_mgr_get_admin_level(mysql_conn, uid) + >= ACCT_ADMIN_OPERATOR) + is_admin = 1; + else { + assoc_mgr_fill_in_user(mysql_conn, &user, 1); + } + } + } if(!user_cond) { xstrcat(extra, "where deleted=0"); @@ -5024,6 +5043,12 @@ extern List acct_storage_p_get_users(mysql_conn_t *mysql_conn, user_cond->admin_level); } empty: + /* This is here to make sure we are looking at only this user + * if this flag is set. + */ + if(!is_admin && (private_data & PRIVATE_DATA_USERS)) { + xstrfmtcat(extra, " && name='%s'", user.name); + } xfree(tmp); xstrfmtcat(tmp, "%s", user_req_inx[i]); @@ -5086,7 +5111,7 @@ empty: assoc_cond->user_list = list_create(NULL); list_append(assoc_cond->user_list, user->name); user->assoc_list = acct_storage_p_get_associations( - mysql_conn, assoc_cond); + mysql_conn, uid, assoc_cond); list_destroy(assoc_cond->user_list); assoc_cond->user_list = NULL; } @@ -5099,7 +5124,7 @@ empty: #endif } -extern List acct_storage_p_get_accts(mysql_conn_t *mysql_conn, +extern List acct_storage_p_get_accts(mysql_conn_t *mysql_conn, uid_t uid, acct_account_cond_t *acct_cond) { #ifdef HAVE_MYSQL @@ -5110,9 +5135,11 @@ extern List acct_storage_p_get_accts(mysql_conn_t *mysql_conn, ListIterator itr = NULL; char *object = NULL; int set = 0; - int i=0; + int i=0, is_admin=1; MYSQL_RES *result = NULL; MYSQL_ROW row; + uint16_t private_data = 0; + acct_user_rec_t user; /* if this changes you will need to edit the corresponding enum */ char *acct_req_inx[] = { @@ -5132,6 +5159,39 @@ extern List acct_storage_p_get_accts(mysql_conn_t *mysql_conn, if(_check_connection(mysql_conn) != SLURM_SUCCESS) return NULL; + memset(&user, 0, sizeof(acct_user_rec_t)); + user.uid = uid; + + private_data = slurm_get_private_data(); + + if (private_data & PRIVATE_DATA_ACCOUNTS) { + /* This only works when running though the slurmdbd. + * THERE IS NO AUTHENTICATION WHEN RUNNNING OUT OF THE + * SLURMDBD! + */ + if(slurmdbd_conf) { + is_admin = 0; + /* we have to check the authentication here in the + * plugin since we don't know what accounts are being + * referenced until after the query. Here we will + * set if they are an operator or greater and then + * check it below after the query. + */ + if((uid == slurmdbd_conf->slurm_user_id || uid == 0) + || assoc_mgr_get_admin_level(mysql_conn, uid) + >= ACCT_ADMIN_OPERATOR) + is_admin = 1; + else { + assoc_mgr_fill_in_user(mysql_conn, &user, 1); + } + + if(!is_admin && (!user.coord_accts + || !list_count(user.coord_accts))) { + errno = ESLURM_ACCESS_DENIED; + return NULL; + } + } + } if(!acct_cond) { xstrcat(extra, "where deleted=0"); @@ -5213,6 +5273,27 @@ empty: xstrfmtcat(tmp, ", %s", acct_req_inx[i]); } + /* This is here to make sure we are looking at only this user + * if this flag is set. We also include any accounts they may be + * coordinator of. + */ + if(!is_admin && (private_data & PRIVATE_DATA_ACCOUNTS)) { + acct_coord_rec_t *coord = NULL; + set = 0; + itr = list_iterator_create(user.coord_accts); + while((coord = list_next(itr))) { + if(set) { + xstrfmtcat(extra, " || name='%s'", coord->name); + } else { + set = 1; + xstrfmtcat(extra, " && (name='%s'",coord->name); + } + } + list_iterator_destroy(itr); + if(set) + xstrcat(extra,")"); + } + query = xstrdup_printf("select %s from %s %s", tmp, acct_table, extra); xfree(tmp); xfree(extra); @@ -5257,7 +5338,7 @@ empty: assoc_cond->acct_list = list_create(NULL); list_append(assoc_cond->acct_list, acct->name); acct->assoc_list = acct_storage_p_get_associations( - mysql_conn, assoc_cond); + mysql_conn, uid, assoc_cond); list_destroy(assoc_cond->acct_list); assoc_cond->acct_list = NULL; } @@ -5271,7 +5352,7 @@ empty: #endif } -extern List acct_storage_p_get_clusters(mysql_conn_t *mysql_conn, +extern List acct_storage_p_get_clusters(mysql_conn_t *mysql_conn, uid_t uid, acct_cluster_cond_t *cluster_cond) { #ifdef HAVE_MYSQL @@ -5384,7 +5465,7 @@ empty: /* get the usage if requested */ if(cluster_cond->with_usage) { clusteracct_storage_p_get_usage( - mysql_conn, cluster, + mysql_conn, uid, cluster, cluster_cond->usage_start, cluster_cond->usage_end); } @@ -5440,7 +5521,8 @@ empty: #endif } -extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, +extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, + uid_t uid, acct_association_cond_t *assoc_cond) { #ifdef HAVE_MYSQL @@ -5451,7 +5533,7 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, ListIterator itr = NULL; char *object = NULL; int set = 0; - int i=0; + int i=0, is_admin=1; MYSQL_RES *result = NULL; MYSQL_ROW row; int parent_mj = INFINITE; @@ -5464,6 +5546,8 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, char *last_cluster2 = NULL; uint32_t user_parent_id = 0; uint32_t acct_parent_id = 0; + uint16_t private_data = 0; + acct_user_rec_t user; /* needed if we don't have an assoc_cond */ uint16_t without_parent_info = 0; @@ -5510,13 +5594,39 @@ extern List acct_storage_p_get_associations(mysql_conn_t *mysql_conn, ASSOC2_REQ_MCPJ }; + if(!assoc_cond) { + xstrcat(extra, "where deleted=0"); + goto empty; + } + if(_check_connection(mysql_conn) != SLURM_SUCCESS) return NULL; + memset(&user, 0, sizeof(acct_user_rec_t)); + user.uid = uid; - if(!assoc_cond) { - xstrcat(extra, "where deleted=0"); - goto empty; + private_data = slurm_get_private_data(); + if (private_data & PRIVATE_DATA_USERS) { + /* This only works when running though the slurmdbd. + * THERE IS NO AUTHENTICATION WHEN RUNNNING OUT OF THE + * SLURMDBD! + */ + if(slurmdbd_conf) { + is_admin = 0; + /* we have to check the authentication here in the + * plugin since we don't know what accounts are being + * referenced until after the query. Here we will + * set if they are an operator or greater and then + * check it below after the query. + */ + if((uid == slurmdbd_conf->slurm_user_id || uid == 0) + || assoc_mgr_get_admin_level(mysql_conn, uid) + >= ACCT_ADMIN_OPERATOR) + is_admin = 1; + else { + assoc_mgr_fill_in_user(mysql_conn, &user, 1); + } + } } if(assoc_cond->with_deleted) @@ -5604,7 +5714,49 @@ empty: for(i=1; i<ASSOC_REQ_COUNT; i++) { xstrfmtcat(tmp, ", %s", assoc_req_inx[i]); } - + + /* this is here to make sure we are looking at only this user + * if this flag is set. We also include any accounts they may be + * coordinator of. + */ + if(!is_admin && (private_data & PRIVATE_DATA_USERS)) { + query = xstrdup_printf("select lft from %s where user='%s'", + assoc_table, user.name); + if(user.coord_accts) { + acct_coord_rec_t *coord = NULL; + itr = list_iterator_create(user.coord_accts); + while((coord = list_next(itr))) { + xstrfmtcat(query, " || acct='%s'", + coord->name); + } + list_iterator_destroy(itr); + } + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + if(!(result = mysql_db_query_ret( + mysql_conn->db_conn, query, 0))) { + xfree(extra); + xfree(query); + return NULL; + } + xfree(query); + set = 0; + while((row = mysql_fetch_row(result))) { + if(set) { + xstrfmtcat(extra, + " || (%s between lft and rgt)", + row[0]); + } else { + set = 1; + xstrfmtcat(extra, + " && ((%s between lft and rgt)", + row[0]); + } + } + if(set) + xstrcat(extra,")"); + mysql_free_result(result); + } + query = xstrdup_printf("select %s from %s %s order by lft;", tmp, assoc_table, extra); xfree(tmp); @@ -5618,31 +5770,31 @@ empty: xfree(query); assoc_list = list_create(destroy_acct_association_rec); - + while((row = mysql_fetch_row(result))) { acct_association_rec_t *assoc = xmalloc(sizeof(acct_association_rec_t)); MYSQL_RES *result2 = NULL; MYSQL_ROW row2; - + list_append(assoc_list, assoc); assoc->id = atoi(row[ASSOC_REQ_ID]); assoc->lft = atoi(row[ASSOC_REQ_LFT]); assoc->rgt = atoi(row[ASSOC_REQ_RGT]); - + + if(row[ASSOC_REQ_USER][0]) + assoc->user = xstrdup(row[ASSOC_REQ_USER]); + assoc->acct = xstrdup(row[ASSOC_REQ_ACCT]); + assoc->cluster = xstrdup(row[ASSOC_REQ_CLUSTER]); + /* get the usage if requested */ if(with_usage) { - acct_storage_p_get_usage(mysql_conn, assoc, + acct_storage_p_get_usage(mysql_conn, uid, assoc, assoc_cond->usage_start, assoc_cond->usage_end); } - if(row[ASSOC_REQ_USER][0]) - assoc->user = xstrdup(row[ASSOC_REQ_USER]); - assoc->acct = xstrdup(row[ASSOC_REQ_ACCT]); - assoc->cluster = xstrdup(row[ASSOC_REQ_CLUSTER]); - if(!without_parent_info && row[ASSOC_REQ_PARENT][0]) { /* info("got %s?=%s and %s?=%s", */ @@ -5767,7 +5919,7 @@ empty: #endif } -extern List acct_storage_p_get_qos(mysql_conn_t *mysql_conn, +extern List acct_storage_p_get_qos(mysql_conn_t *mysql_conn, uid_t uid, acct_qos_cond_t *qos_cond) { #ifdef HAVE_MYSQL @@ -5894,7 +6046,7 @@ empty: #endif } -extern List acct_storage_p_get_txn(mysql_conn_t *mysql_conn, +extern List acct_storage_p_get_txn(mysql_conn_t *mysql_conn, uid_t uid, acct_txn_cond_t *txn_cond) { #ifdef HAVE_MYSQL @@ -6063,13 +6215,13 @@ empty: #endif } -extern int acct_storage_p_get_usage(mysql_conn_t *mysql_conn, +extern int acct_storage_p_get_usage(mysql_conn_t *mysql_conn, uid_t uid, acct_association_rec_t *acct_assoc, time_t start, time_t end) { #ifdef HAVE_MYSQL int rc = SLURM_SUCCESS; - int i=0; + int i=0, is_admin=1; MYSQL_RES *result = NULL; MYSQL_ROW row; char *tmp = NULL; @@ -6078,6 +6230,8 @@ extern int acct_storage_p_get_usage(mysql_conn_t *mysql_conn, struct tm start_tm; struct tm end_tm; char *query = NULL; + uint16_t private_data = 0; + acct_user_rec_t user; char *assoc_req_inx[] = { "t1.id", @@ -6097,6 +6251,72 @@ extern int acct_storage_p_get_usage(mysql_conn_t *mysql_conn, return SLURM_ERROR; } + if(_check_connection(mysql_conn) != SLURM_SUCCESS) + return SLURM_ERROR; + + memset(&user, 0, sizeof(acct_user_rec_t)); + user.uid = uid; + + private_data = slurm_get_private_data(); + if (private_data & PRIVATE_DATA_USAGE) { + /* This only works when running though the slurmdbd. + * THERE IS NO AUTHENTICATION WHEN RUNNNING OUT OF THE + * SLURMDBD! + */ + if(slurmdbd_conf) { + is_admin = 0; + /* we have to check the authentication here in the + * plugin since we don't know what accounts are being + * referenced until after the query. Here we will + * set if they are an operator or greater and then + * check it below after the query. + */ + if((uid == slurmdbd_conf->slurm_user_id || uid == 0) + || assoc_mgr_get_admin_level(mysql_conn, uid) + >= ACCT_ADMIN_OPERATOR) + is_admin = 1; + else { + assoc_mgr_fill_in_user(mysql_conn, &user, 1); + } + + if(!is_admin) { + ListIterator itr = NULL; + acct_coord_rec_t *coord = NULL; + + if(acct_assoc->user && + !strcmp(acct_assoc->user, user.name)) + goto is_user; + + if(!user.coord_accts) { + debug4("This user isn't a coord."); + goto bad_user; + } + + if(!acct_assoc->acct) { + debug("No account name given " + "in association."); + goto bad_user; + } + + itr = list_iterator_create(user.coord_accts); + while((coord = list_next(itr))) { + if(!strcasecmp(coord->name, + acct_assoc->acct)) + break; + } + list_iterator_destroy(itr); + + if(coord) + goto is_user; + + bad_user: + errno = ESLURM_ACCESS_DENIED; + return SLURM_ERROR; + } + } + } +is_user: + /* Default is going to be the last day */ if(!end) { if(!localtime_r(&my_time, &end_tm)) { @@ -6592,7 +6812,7 @@ end_it: } extern int clusteracct_storage_p_get_usage( - mysql_conn_t *mysql_conn, + mysql_conn_t *mysql_conn, uid_t uid, acct_cluster_rec_t *cluster_rec, time_t start, time_t end) { #ifdef HAVE_MYSQL @@ -6806,27 +7026,41 @@ extern int jobacct_storage_p_job_start(mysql_conn_t *mysql_conn, if(!job_ptr->db_index) { query = xstrdup_printf( "insert into %s " - "(jobid, account, associd, uid, gid, partition, " - "blockid, eligible, submit, start, name, track_steps, " - "state, priority, req_cpus, alloc_cpus, nodelist) " - "values (%u, '%s', %u, %u, %u, '%s', '%s', " - "%d, %d, %d, '%s', %u, " - "%u, %u, %u, %u, '%s') " - "on duplicate key update id=LAST_INSERT_ID(id), " - "end=0, state=%u", - job_table, job_ptr->job_id, job_ptr->account, - job_ptr->assoc_id, - job_ptr->user_id, job_ptr->group_id, - job_ptr->partition, block_id, - (int)job_ptr->details->begin_time, - (int)job_ptr->details->submit_time, - (int)job_ptr->start_time, - jname, track_steps, - job_ptr->job_state & (~JOB_COMPLETING), - priority, job_ptr->num_procs, - job_ptr->total_procs, nodes, - job_ptr->job_state & (~JOB_COMPLETING)); + "(jobid, account, associd, uid, gid, partition, ", + job_table); + if(block_id) + xstrcat(query, "blockid, "); + + xstrfmtcat(query, + "eligible, submit, start, name, track_steps, " + "state, priority, req_cpus, alloc_cpus, nodelist) " + "values (%u, '%s', %u, %u, %u, '%s', ", + job_ptr->job_id, job_ptr->account, + job_ptr->assoc_id, + job_ptr->user_id, job_ptr->group_id, + job_ptr->partition); + + if(block_id) + xstrfmtcat(query, "'%s', ", block_id); + + xstrfmtcat(query, + "%d, %d, %d, '%s', %u, %u, %u, %u, %u, '%s') " + "on duplicate key update " + "id=LAST_INSERT_ID(id), end=0, state=%u, " + "partition ='%s', account='%s', associd=%u", + (int)job_ptr->details->begin_time, + (int)job_ptr->details->submit_time, + (int)job_ptr->start_time, + jname, track_steps, + job_ptr->job_state & (~JOB_COMPLETING), + priority, job_ptr->num_procs, + job_ptr->total_procs, nodes, + job_ptr->job_state & (~JOB_COMPLETING), + job_ptr->partition, job_ptr->account, + job_ptr->assoc_id); + + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); try_again: if(!(job_ptr->db_index = mysql_insert_ret_id( mysql_conn->db_conn, query))) { @@ -6845,15 +7079,19 @@ extern int jobacct_storage_p_job_start(mysql_conn_t *mysql_conn, } } else { query = xstrdup_printf( - "update %s set partition='%s', blockid='%s', start=%d, " - "name='%s', state=%u, alloc_cpus=%u, nodelist='%s', " - "account='%s', end=0 where id=%d", - job_table, job_ptr->partition, block_id, - (int)job_ptr->start_time, - jname, - job_ptr->job_state & (~JOB_COMPLETING), - job_ptr->total_procs, nodes, - job_ptr->account, job_ptr->db_index); + "update %s set partition='%s', ", + job_table, job_ptr->partition); + if(block_id) + xstrfmtcat(query, "blockid='%s', ", block_id); + xstrfmtcat(query, "start=%d, name='%s', state=%u, " + "alloc_cpus=%u, nodelist='%s', " + "account='%s', associd=%u, end=0 where id=%d", + (int)job_ptr->start_time, + jname, job_ptr->job_state & (~JOB_COMPLETING), + job_ptr->total_procs, nodes, + job_ptr->account, job_ptr->assoc_id, + job_ptr->db_index); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); rc = mysql_db_query(mysql_conn->db_conn, query); } @@ -6899,11 +7137,11 @@ extern int jobacct_storage_p_job_complete(mysql_conn_t *mysql_conn, nodes = "(null)"; if(!job_ptr->db_index) { - job_ptr->db_index = _get_db_index(mysql_conn->db_conn, - job_ptr->details->submit_time, - job_ptr->job_id, - job_ptr->assoc_id); - if(job_ptr->db_index == (uint32_t)-1) { + if(!(job_ptr->db_index = + _get_db_index(mysql_conn->db_conn, + job_ptr->details->submit_time, + job_ptr->job_id, + job_ptr->assoc_id))) { /* If we get an error with this just fall * through to avoid an infinite loop */ @@ -6925,10 +7163,11 @@ extern int jobacct_storage_p_job_complete(mysql_conn_t *mysql_conn, job_ptr->job_state & (~JOB_COMPLETING), nodes, job_ptr->exit_code, job_ptr->requid, job_ptr->db_index); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); - return rc; + return rc; #else return SLURM_ERROR; #endif @@ -6994,12 +7233,11 @@ extern int jobacct_storage_p_step_start(mysql_conn_t *mysql_conn, * hasn't been set yet */ if(!step_ptr->job_ptr->db_index) { - step_ptr->job_ptr->db_index = - _get_db_index(mysql_conn->db_conn, - step_ptr->job_ptr->details->submit_time, - step_ptr->job_ptr->job_id, - step_ptr->job_ptr->assoc_id); - if(step_ptr->job_ptr->db_index == (uint32_t)-1) { + if(!(step_ptr->job_ptr->db_index = + _get_db_index(mysql_conn->db_conn, + step_ptr->job_ptr->details->submit_time, + step_ptr->job_ptr->job_id, + step_ptr->job_ptr->assoc_id))) { /* If we get an error with this just fall * through to avoid an infinite loop */ @@ -7111,12 +7349,11 @@ extern int jobacct_storage_p_step_complete(mysql_conn_t *mysql_conn, } if(!step_ptr->job_ptr->db_index) { - step_ptr->job_ptr->db_index = - _get_db_index(mysql_conn->db_conn, - step_ptr->job_ptr->details->submit_time, - step_ptr->job_ptr->job_id, - step_ptr->job_ptr->assoc_id); - if(step_ptr->job_ptr->db_index == (uint32_t)-1) { + if(!(step_ptr->job_ptr->db_index = + _get_db_index(mysql_conn->db_conn, + step_ptr->job_ptr->details->submit_time, + step_ptr->job_ptr->job_id, + step_ptr->job_ptr->assoc_id))) { /* If we get an error with this just fall * through to avoid an infinite loop */ @@ -7174,6 +7411,7 @@ extern int jobacct_storage_p_step_complete(mysql_conn_t *mysql_conn, jobacct->min_cpu_id.nodeid, /* min cpu node */ ave_cpu, /* ave cpu */ step_ptr->job_ptr->db_index, step_ptr->step_id); + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); rc = mysql_db_query(mysql_conn->db_conn, query); xfree(query); @@ -7197,11 +7435,11 @@ extern int jobacct_storage_p_suspend(mysql_conn_t *mysql_conn, if(_check_connection(mysql_conn) != SLURM_SUCCESS) return SLURM_ERROR; if(!job_ptr->db_index) { - job_ptr->db_index = _get_db_index(mysql_conn->db_conn, - job_ptr->details->submit_time, - job_ptr->job_id, - job_ptr->assoc_id); - if(job_ptr->db_index == (uint32_t)-1) { + if(!(job_ptr->db_index = + _get_db_index(mysql_conn->db_conn, + job_ptr->details->submit_time, + job_ptr->job_id, + job_ptr->assoc_id))) { /* If we get an error with this just fall * through to avoid an infinite loop */ @@ -7260,7 +7498,7 @@ extern int jobacct_storage_p_suspend(mysql_conn_t *mysql_conn, * returns List of job_rec_t * * note List needs to be freed when called */ -extern List jobacct_storage_p_get_jobs(mysql_conn_t *mysql_conn, +extern List jobacct_storage_p_get_jobs(mysql_conn_t *mysql_conn, uid_t uid, List selected_steps, List selected_parts, sacct_parameters_t *params) @@ -7290,7 +7528,7 @@ extern List jobacct_storage_p_get_jobs(mysql_conn_t *mysql_conn, list_append(job_cond.groupid_list, temp); } - job_list = mysql_jobacct_process_get_jobs(mysql_conn, &job_cond); + job_list = mysql_jobacct_process_get_jobs(mysql_conn, uid, &job_cond); if(job_cond.userid_list) list_destroy(job_cond.userid_list); @@ -7307,13 +7545,14 @@ extern List jobacct_storage_p_get_jobs(mysql_conn_t *mysql_conn, * note List needs to be freed when called */ extern List jobacct_storage_p_get_jobs_cond(mysql_conn_t *mysql_conn, + uid_t uid, acct_job_cond_t *job_cond) { List job_list = NULL; #ifdef HAVE_MYSQL if(_check_connection(mysql_conn) != SLURM_SUCCESS) return NULL; - job_list = mysql_jobacct_process_get_jobs(mysql_conn, job_cond); + job_list = mysql_jobacct_process_get_jobs(mysql_conn, uid, job_cond); #endif return job_list; } diff --git a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c index 0b0c44edd..318daf6dc 100644 --- a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c +++ b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c @@ -46,7 +46,7 @@ #ifdef HAVE_MYSQL -extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, +extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, uid_t uid, acct_job_cond_t *job_cond) { @@ -56,7 +56,7 @@ extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, char *object = NULL; jobacct_selected_step_t *selected_step = NULL; ListIterator itr = NULL; - int set = 0; + int set = 0, is_admin=1; char *table_level="t2"; MYSQL_RES *result = NULL, *step_result = NULL; MYSQL_ROW row, step_row; @@ -65,6 +65,8 @@ extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, jobacct_step_rec_t *step = NULL; time_t now = time(NULL); List job_list = list_create(destroy_jobacct_job_rec); + uint16_t private_data = 0; + acct_user_rec_t user; /* if this changes you will need to edit the corresponding * enum below also t1 is job_table */ @@ -195,6 +197,33 @@ extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, STEP_REQ_COUNT }; + memset(&user, 0, sizeof(acct_user_rec_t)); + user.uid = uid; + + private_data = slurm_get_private_data(); + if (private_data & PRIVATE_DATA_JOBS) { + /* This only works when running though the slurmdbd. + * THERE IS NO AUTHENTICATION WHEN RUNNNING OUT OF THE + * SLURMDBD! + */ + if(slurmdbd_conf) { + is_admin = 0; + /* we have to check the authentication here in the + * plugin since we don't know what accounts are being + * referenced until after the query. Here we will + * set if they are an operator or greater and then + * check it below after the query. + */ + if((uid == slurmdbd_conf->slurm_user_id || uid == 0) + || assoc_mgr_get_admin_level(mysql_conn, uid) + >= ACCT_ADMIN_OPERATOR) + is_admin = 1; + else { + assoc_mgr_fill_in_user(mysql_conn, &user, 1); + } + } + } + if(!job_cond) goto no_cond; @@ -368,6 +397,57 @@ no_cond: for(i=1; i<JOB_REQ_COUNT; i++) { xstrfmtcat(tmp, ", %s", job_req_inx[i]); } + + /* This is here to make sure we are looking at only this user + * if this flag is set. We also include any accounts they may be + * coordinator of. + */ + if(!is_admin && (private_data & PRIVATE_DATA_JOBS)) { + query = xstrdup_printf("select lft from %s where user='%s'", + assoc_table, user.name); + if(user.coord_accts) { + acct_coord_rec_t *coord = NULL; + itr = list_iterator_create(user.coord_accts); + while((coord = list_next(itr))) { + xstrfmtcat(query, " || acct='%s'", + coord->name); + } + list_iterator_destroy(itr); + } + debug3("%d(%d) query\n%s", mysql_conn->conn, __LINE__, query); + if(!(result = mysql_db_query_ret( + mysql_conn->db_conn, query, 0))) { + xfree(extra); + xfree(query); + return NULL; + } + xfree(query); + set = 0; + while((row = mysql_fetch_row(result))) { + if(set) { + xstrfmtcat(extra, + " || (%s between %s.lft and %s.rgt)", + row[0], table_level, table_level); + } else { + set = 1; + if(extra) + xstrfmtcat(extra, + " && ((%s between %s.lft " + "and %s.rgt)", + row[0], table_level, + table_level); + else + xstrfmtcat(extra, + " where ((%s between %s.lft " + "and %s.rgt)", + row[0], table_level, + table_level); + } + } + if(set) + xstrcat(extra,")"); + mysql_free_result(result); + } query = xstrdup_printf("select %s from %s as t1 left join %s as t2 " "on t1.associd=t2.id", diff --git a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.h b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.h index 983d6db2f..8085f0ace 100644 --- a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.h +++ b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.h @@ -62,7 +62,7 @@ extern char *job_table; extern char *step_table; extern char *suspend_table; -extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, +extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, uid_t uid, acct_job_cond_t *job_cond); extern void mysql_jobacct_process_archive(mysql_conn_t *mysql_conn, diff --git a/src/plugins/accounting_storage/none/accounting_storage_none.c b/src/plugins/accounting_storage/none/accounting_storage_none.c index 2cd7c7df9..373cd2203 100644 --- a/src/plugins/accounting_storage/none/accounting_storage_none.c +++ b/src/plugins/accounting_storage/none/accounting_storage_none.c @@ -203,43 +203,43 @@ extern List acct_storage_p_remove_qos(void *db_conn, uint32_t uid, return NULL; } -extern List acct_storage_p_get_users(void *db_conn, +extern List acct_storage_p_get_users(void *db_conn, uid_t uid, acct_user_cond_t *user_q) { return NULL; } -extern List acct_storage_p_get_accts(void *db_conn, +extern List acct_storage_p_get_accts(void *db_conn, uid_t uid, acct_account_cond_t *acct_q) { return NULL; } -extern List acct_storage_p_get_clusters(void *db_conn, +extern List acct_storage_p_get_clusters(void *db_conn, uid_t uid, acct_account_cond_t *cluster_q) { return NULL; } -extern List acct_storage_p_get_associations(void *db_conn, +extern List acct_storage_p_get_associations(void *db_conn, uid_t uid, acct_association_cond_t *assoc_q) { return NULL; } -extern List acct_storage_p_get_qos(void *db_conn, +extern List acct_storage_p_get_qos(void *db_conn, uid_t uid, acct_qos_cond_t *qos_cond) { return NULL; } -extern List acct_storage_p_get_txn(void *db_conn, +extern List acct_storage_p_get_txn(void *db_conn, uid_t uid, acct_txn_cond_t *txn_cond) { return NULL; } -extern int acct_storage_p_get_usage(void *db_conn, +extern int acct_storage_p_get_usage(void *db_conn, uid_t uid, acct_association_rec_t *acct_assoc, time_t start, time_t end) { @@ -286,7 +286,7 @@ extern int clusteracct_storage_p_cluster_procs(void *db_conn, } extern int clusteracct_storage_p_get_usage( - void *db_conn, + void *db_conn, uid_t uid, acct_cluster_rec_t *cluster_rec, time_t start, time_t end) { @@ -343,7 +343,7 @@ extern int jobacct_storage_p_suspend(void *db_conn, * returns List of job_rec_t * * note List needs to be freed when called */ -extern List jobacct_storage_p_get_jobs(void *db_conn, +extern List jobacct_storage_p_get_jobs(void *db_conn, uid_t uid, List selected_steps, List selected_parts, void *params) @@ -356,7 +356,8 @@ extern List jobacct_storage_p_get_jobs(void *db_conn, * returns List of jobacct_job_rec_t * * note List needs to be freed when called */ -extern List jobacct_storage_p_get_jobs_cond(void *db_conn, void *job_cond) +extern List jobacct_storage_p_get_jobs_cond(void *db_conn, uid_t uid, + void *job_cond) { return NULL; } diff --git a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c index 41d5987ec..26c6e0535 100644 --- a/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c +++ b/src/plugins/accounting_storage/pgsql/accounting_storage_pgsql.c @@ -890,43 +890,43 @@ extern List acct_storage_p_remove_qos(void *db_conn, uint32_t uid, return NULL; } -extern List acct_storage_p_get_users(PGconn *acct_pgsql_db, +extern List acct_storage_p_get_users(PGconn *acct_pgsql_db, uid_t uid, acct_user_cond_t *user_cond) { return NULL; } -extern List acct_storage_p_get_accts(PGconn *acct_pgsql_db, +extern List acct_storage_p_get_accts(PGconn *acct_pgsql_db, uid_t uid, acct_account_cond_t *acct_cond) { return NULL; } -extern List acct_storage_p_get_clusters(PGconn *acct_pgsql_db, +extern List acct_storage_p_get_clusters(PGconn *acct_pgsql_db, uid_t uid, acct_account_cond_t *cluster_cond) { return NULL; } -extern List acct_storage_p_get_associations(PGconn *acct_pgsql_db, +extern List acct_storage_p_get_associations(PGconn *acct_pgsql_db, uid_t uid, acct_association_cond_t *assoc_cond) { return NULL; } -extern List acct_storage_p_get_qos(void *db_conn, +extern List acct_storage_p_get_qos(void *db_conn, uid_t uid, acct_qos_cond_t *qos_cond) { return NULL; } -extern List acct_storage_p_get_txn(PGconn *acct_pgsql_db, +extern List acct_storage_p_get_txn(PGconn *acct_pgsql_db, uid_t uid, acct_txn_cond_t *txn_cond) { return NULL; } -extern int acct_storage_p_get_usage(PGconn *acct_pgsql_db, +extern int acct_storage_p_get_usage(PGconn *acct_pgsql_db, uid_t uid, acct_association_rec_t *acct_assoc, time_t start, time_t end) { @@ -1087,7 +1087,7 @@ end_it: } extern int clusteracct_storage_p_get_usage( - void *db_conn, + void *db_conn, uid_t uid, acct_cluster_rec_t *cluster_rec, time_t start, time_t end) { @@ -1562,7 +1562,7 @@ extern int jobacct_storage_p_suspend(PGconn *acct_pgsql_db, * returns List of job_rec_t * * note List needs to be freed when called */ -extern List jobacct_storage_p_get_jobs(PGconn *acct_pgsql_db, +extern List jobacct_storage_p_get_jobs(PGconn *acct_pgsql_db, uid_t uid, List selected_steps, List selected_parts, sacct_parameters_t *params) @@ -1612,7 +1612,7 @@ extern List jobacct_storage_p_get_jobs(PGconn *acct_pgsql_db, * returns List of job_rec_t * * note List needs to be freed when called */ -extern List jobacct_storage_p_get_jobs_cond(PGconn *acct_pgsql_db, +extern List jobacct_storage_p_get_jobs_cond(PGconn *acct_pgsql_db, uid_t uid, acct_job_cond_t *job_cond) { List job_list = NULL; diff --git a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c index f0c8556f2..b003a7a37 100644 --- a/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c +++ b/src/plugins/accounting_storage/slurmdbd/accounting_storage_slurmdbd.c @@ -688,7 +688,7 @@ extern List acct_storage_p_remove_qos( return ret_list; } -extern List acct_storage_p_get_users(void *db_conn, +extern List acct_storage_p_get_users(void *db_conn, uid_t uid, acct_user_cond_t *user_cond) { slurmdbd_msg_t req, resp; @@ -718,7 +718,7 @@ extern List acct_storage_p_get_users(void *db_conn, return ret_list; } -extern List acct_storage_p_get_accts(void *db_conn, +extern List acct_storage_p_get_accts(void *db_conn, uid_t uid, acct_account_cond_t *acct_cond) { slurmdbd_msg_t req, resp; @@ -749,7 +749,7 @@ extern List acct_storage_p_get_accts(void *db_conn, return ret_list; } -extern List acct_storage_p_get_clusters(void *db_conn, +extern List acct_storage_p_get_clusters(void *db_conn, uid_t uid, acct_account_cond_t *cluster_cond) { slurmdbd_msg_t req, resp; @@ -780,7 +780,7 @@ extern List acct_storage_p_get_clusters(void *db_conn, return ret_list; } -extern List acct_storage_p_get_associations(void *db_conn, +extern List acct_storage_p_get_associations(void *db_conn, uid_t uid, acct_association_cond_t *assoc_cond) { slurmdbd_msg_t req, resp; @@ -810,7 +810,7 @@ extern List acct_storage_p_get_associations(void *db_conn, return ret_list; } -extern List acct_storage_p_get_qos(void *db_conn, +extern List acct_storage_p_get_qos(void *db_conn, uid_t uid, acct_qos_cond_t *qos_cond) { slurmdbd_msg_t req, resp; @@ -847,7 +847,7 @@ extern List acct_storage_p_get_qos(void *db_conn, return ret_list; } -extern List acct_storage_p_get_txn(void *db_conn, +extern List acct_storage_p_get_txn(void *db_conn, uid_t uid, acct_txn_cond_t *txn_cond) { slurmdbd_msg_t req, resp; @@ -877,7 +877,7 @@ extern List acct_storage_p_get_txn(void *db_conn, return ret_list; } -extern int acct_storage_p_get_usage(void *db_conn, +extern int acct_storage_p_get_usage(void *db_conn, uid_t uid, acct_association_rec_t *acct_assoc, time_t start, time_t end) { @@ -1029,7 +1029,7 @@ extern int clusteracct_storage_p_register_ctld(char *cluster, } extern int clusteracct_storage_p_get_usage( - void *db_conn, + void *db_conn, uid_t uid, acct_cluster_rec_t *cluster_rec, time_t start, time_t end) { @@ -1346,7 +1346,7 @@ extern int jobacct_storage_p_suspend(void *db_conn, * returns List of job_rec_t * * note List needs to be freed when called */ -extern List jobacct_storage_p_get_jobs(void *db_conn, +extern List jobacct_storage_p_get_jobs(void *db_conn, uid_t uid, List selected_steps, List selected_parts, sacct_parameters_t *params) @@ -1397,7 +1397,7 @@ extern List jobacct_storage_p_get_jobs(void *db_conn, * returns List of job_rec_t * * note List needs to be freed when called */ -extern List jobacct_storage_p_get_jobs_cond(void *db_conn, +extern List jobacct_storage_p_get_jobs_cond(void *db_conn, uid_t uid, acct_job_cond_t *job_cond) { slurmdbd_msg_t req, resp; diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index 207c906a7..16373f2e6 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -74,6 +74,7 @@ #include "src/slurmctld/node_scheduler.h" #include "src/slurmctld/slurmctld.h" #include "src/slurmctld/srun_comm.h" +#include "backfill.h" typedef struct node_space_map { time_t begin_time; @@ -169,14 +170,21 @@ extern void *backfill_agent(void *args) struct timeval tv1, tv2; char tv_str[20]; time_t now; + int i, iter; static time_t last_backfill_time = 0; /* Read config, and partitions; Write jobs and nodes */ slurmctld_lock_t all_locks = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK }; while (!stop_backfill) { - sleep(2); /* don't run continuously */ - + iter = (BACKFILL_CHECK_SEC * 1000000) / + STOP_CHECK_USEC; + for (i=0; ((i<iter) && (!stop_backfill)); i++) { + /* test stop_backfill every 0.1 sec for + * 2.0 secs to avoid running continuously */ + usleep(STOP_CHECK_USEC); + } + now = time(NULL); /* Avoid resource fragmentation if important */ if (switch_no_frag() && job_is_completing()) diff --git a/src/plugins/sched/backfill/backfill.h b/src/plugins/sched/backfill/backfill.h index 76ff6d136..2e894eaa6 100644 --- a/src/plugins/sched/backfill/backfill.h +++ b/src/plugins/sched/backfill/backfill.h @@ -35,6 +35,9 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \*****************************************************************************/ +#define STOP_CHECK_USEC 100000 /* check for shutdown every 0.1 secs */ +#define BACKFILL_CHECK_SEC 2 /* try to backfill every 2.0 seconds */ + /* backfill_agent - detached thread periodically attempts to backfill jobs */ extern void *backfill_agent(void *args); diff --git a/src/plugins/sched/backfill/backfill_wrapper.c b/src/plugins/sched/backfill/backfill_wrapper.c index ce23c5808..cc73e722b 100644 --- a/src/plugins/sched/backfill/backfill_wrapper.c +++ b/src/plugins/sched/backfill/backfill_wrapper.c @@ -96,25 +96,12 @@ int init( void ) /**************************************************************************/ /* TAG( fini ) */ /**************************************************************************/ -static void _cancel_thread (pthread_t thread_id) -{ - int i; - - for (i=0; i<4; i++) { - if (pthread_cancel(thread_id)) - return; - usleep(1000); - } - error("Could not kill backfill sched pthread"); -} - void fini( void ) { pthread_mutex_lock( &thread_flag_mutex ); if ( backfill_thread ) { verbose( "Backfill scheduler plugin shutting down" ); stop_backfill_agent(); - _cancel_thread( backfill_thread ); backfill_thread = 0; } pthread_mutex_unlock( &thread_flag_mutex ); diff --git a/src/plugins/sched/wiki/job_modify.c b/src/plugins/sched/wiki/job_modify.c index 4ac123c9c..7cad50ead 100644 --- a/src/plugins/sched/wiki/job_modify.c +++ b/src/plugins/sched/wiki/job_modify.c @@ -95,11 +95,10 @@ static int _job_modify(uint32_t jobid, char *bank_ptr, old_time) * 60); last_job_update = time(NULL); } - if (bank_ptr) { - info("wiki: change job %u bank %s", jobid, bank_ptr); - xfree(job_ptr->account); - job_ptr->account = xstrdup(bank_ptr); - last_job_update = time(NULL); + + if (bank_ptr && + (update_job_account("wiki", job_ptr, bank_ptr) != SLURM_SUCCESS)) { + return EINVAL; } if (new_hostlist) { diff --git a/src/plugins/sched/wiki2/job_modify.c b/src/plugins/sched/wiki2/job_modify.c index 9e5f7aec1..714a38976 100644 --- a/src/plugins/sched/wiki2/job_modify.c +++ b/src/plugins/sched/wiki2/job_modify.c @@ -99,11 +99,9 @@ static int _job_modify(uint32_t jobid, char *bank_ptr, last_job_update = now; } - if (bank_ptr) { - info("wiki: change job %u bank %s", jobid, bank_ptr); - xfree(job_ptr->account); - job_ptr->account = xstrdup(bank_ptr); - last_job_update = now; + if (bank_ptr && + (update_job_account("wiki", job_ptr, bank_ptr) != SLURM_SUCCESS)) { + return EINVAL; } if (feature_ptr) { diff --git a/src/plugins/select/bluegene/block_allocator/block_allocator.c b/src/plugins/select/bluegene/block_allocator/block_allocator.c index b8780f96f..4a6e99f7b 100644 --- a/src/plugins/select/bluegene/block_allocator/block_allocator.c +++ b/src/plugins/select/bluegene/block_allocator/block_allocator.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * block_allocator.c - Assorted functions for layout of bglblocks, * wiring, mapping for smap, etc. - * $Id: block_allocator.c 14348 2008-06-25 19:05:31Z da $ + * $Id: block_allocator.c 14952 2008-09-03 16:08:14Z da $ ***************************************************************************** * Copyright (C) 2004 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -101,19 +101,26 @@ s_p_options_t bg_conf_file_options[] = { #ifdef HAVE_BG_FILES /** */ static void _bp_map_list_del(void *object); + +/** */ static int _port_enum(int port); + #endif /* */ static int _check_for_options(ba_request_t* ba_request); + /* */ static int _append_geo(int *geo, List geos, int rotate); + /* */ static int _fill_in_coords(List results, List start_list, int *geometry, int conn_type); + /* */ static int _copy_the_path(List nodes, ba_switch_t *curr_switch, ba_switch_t *mark_switch, int source, int dim); + /* */ static int _find_yz_path(ba_node_t *ba_node, int *first, int *geometry, int conn_type); @@ -145,26 +152,33 @@ static void _delete_path_list(void *object); /* find the first block match in the system */ static int _find_match(ba_request_t* ba_request, List results); +/** */ static bool _node_used(ba_node_t* ba_node, int *geometry); /* */ static void _switch_config(ba_node_t* source, ba_node_t* target, int dim, int port_src, int port_tar); + /* */ static int _set_external_wires(int dim, int count, ba_node_t* source, ba_node_t* target); + /* */ static char *_set_internal_wires(List nodes, int size, int conn_type); + /* */ static int _find_x_path(List results, ba_node_t *ba_node, int *start, int *first, int *geometry, int found, int conn_type); + /* */ static int _find_x_path2(List results, ba_node_t *ba_node, int *start, int *first, int *geometry, int found, int conn_type); + /* */ static int _remove_node(List results, int *node_tar); + /* */ static int _find_next_free_using_port_2(ba_switch_t *curr_switch, int source_port, @@ -372,12 +386,39 @@ extern void destroy_ba_node(void *ptr) } } -/** +/* * create a block request. Note that if the geometry is given, - * then size is ignored. + * then size is ignored. If elongate is true, the algorithm will try + * to fit that a block of cubic shape and then it will try other + * elongated geometries. (ie, 2x2x2 -> 4x2x1 -> 8x1x1). * * IN/OUT - ba_request: structure to allocate and fill in. - * return SUCCESS of operation. + * + * ALL below IN's need to be set within the ba_request before the call + * if you want them to be used. + * ALL below OUT's are set and returned within the ba_request. + * IN - avail_node_bitmap: bitmap of usable midplanes. + * IN - blrtsimage: BlrtsImage for this block if not default + * IN - conn_type: connection type of request (TORUS or MESH or SMALL) + * IN - elongate: if true, will try to fit different geometries of + * same size requests + * IN/OUT - geometry: requested/returned geometry of block + * IN - linuximage: LinuxImage for this block if not default + * IN - mloaderimage: MLoaderImage for this block if not default + * IN - nodecards: Number of nodecards in each block in request only + * used of small block allocations. + * OUT - passthroughs: if there were passthroughs used in the + * generation of the block. + * IN - procs: Number of real processors requested + * IN - quarters: Number of midplane quarters in each block in request only + * used of small block allocations. + * IN - RamDiskimage: RamDiskImage for this block if not default + * IN - rotate: if true, allows rotation of block during fit + * OUT - save_name: hostlist of midplanes used in block + * IN/OUT - size: requested/returned count of midplanes in block + * IN - start: geo location of where to start the allocation + * IN - start_req: if set use the start variable to start at + * return success of allocation/validation of params */ extern int new_ba_request(ba_request_t* ba_request) { @@ -967,6 +1008,11 @@ node_info_error: init_grid(node_info_ptr); } +/* If emulating a system set up a known configuration for wires in a + * system of the size given. + * If a real bluegene system, query the system and get all wiring + * information of the system. + */ extern void init_wires() { int x, y, z, i; @@ -1037,10 +1083,12 @@ extern void ba_fini() } -/** - * set the node in the internal configuration as unusable +/* + * set the node in the internal configuration as in, or not in use, + * along with the current state of the node. * - * IN ba_node: ba_node_t to put down + * IN ba_node: ba_node_t to update state + * IN state: new state of ba_node_t */ extern void ba_update_node_state(ba_node_t *ba_node, uint16_t state) { @@ -1070,11 +1118,12 @@ extern void ba_update_node_state(ba_node_t *ba_node, uint16_t state) ba_node->used = false; ba_node->state = state; } -/** - * copy info from a ba_node + +/* + * copy info from a ba_node, a direct memcpy of the ba_node_t * * IN ba_node: node to be copied - * OUT ba_node_t *: copied info must be freed with destroy_ba_node + * Returned ba_node_t *: copied info must be freed with destroy_ba_node */ extern ba_node_t *ba_copy_node(ba_node_t *ba_node) { @@ -1083,7 +1132,74 @@ extern ba_node_t *ba_copy_node(ba_node_t *ba_node) memcpy(new_ba_node, ba_node, sizeof(ba_node_t)); return new_ba_node; } -/** + +/* + * copy the path of the nodes given + * + * IN nodes List of ba_node_t *'s: nodes to be copied + * OUT dest_nodes List of ba_node_t *'s: filled in list of nodes + * wiring. + * Return on success SLURM_SUCCESS, on error SLURM_ERROR + */ +extern int copy_node_path(List nodes, List *dest_nodes) +{ + int rc = SLURM_ERROR; + +#ifdef HAVE_BG + ListIterator itr = NULL; + ListIterator itr2 = NULL; + ba_node_t *ba_node = NULL, *new_ba_node = NULL; + int dim; + ba_switch_t *curr_switch = NULL, *new_switch = NULL; + + if(!nodes) + return SLURM_ERROR; + if(!*dest_nodes) + *dest_nodes = list_create(destroy_ba_node); + + itr = list_iterator_create(nodes); + while((ba_node = list_next(itr))) { + itr2 = list_iterator_create(*dest_nodes); + while((new_ba_node = list_next(itr2))) { + if (ba_node->coord[X] == new_ba_node->coord[X] && + ba_node->coord[Y] == new_ba_node->coord[Y] && + ba_node->coord[Z] == new_ba_node->coord[Z]) + break; /* we found it */ + } + list_iterator_destroy(itr2); + + if(!new_ba_node) { + debug2("adding %c%c%c as a new node", + alpha_num[ba_node->coord[X]], + alpha_num[ba_node->coord[Y]], + alpha_num[ba_node->coord[Z]]); + new_ba_node = ba_copy_node(ba_node); + _new_ba_node(new_ba_node, ba_node->coord, false); + list_push(*dest_nodes, new_ba_node); + + } + new_ba_node->used = true; + for(dim=0;dim<BA_SYSTEM_DIMENSIONS;dim++) { + curr_switch = &ba_node->axis_switch[dim]; + new_switch = &new_ba_node->axis_switch[dim]; + if(curr_switch->int_wire[0].used) { + if(!_copy_the_path(*dest_nodes, + curr_switch, new_switch, + 0, dim)) { + rc = SLURM_ERROR; + break; + } + } + } + + } + list_iterator_destroy(itr); + rc = SLURM_SUCCESS; +#endif + return rc; +} + +/* * Try to allocate a block. * * IN - ba_request: allocation request @@ -1114,10 +1230,9 @@ extern int allocate_block(ba_request_t* ba_request, List results) } -/** - * Doh! Admin made a boo boo. - * - * returns SLURM_SUCCESS if undo was successful. +/* + * Admin wants to remove a previous allocation. + * will allow Admin to delete a previous allocation retrival by letter code. */ extern int remove_block(List nodes, int new_count) { @@ -1148,11 +1263,11 @@ extern int remove_block(List nodes, int new_count) return 1; } -/** - * Doh! Admin made a boo boo. Note: Undo only has one history - * element, so two consecutive undo's will fail. - * - * returns SLURM_SUCCESS if undo was successful. +/* + * Admin wants to change something about a previous allocation. + * will allow Admin to change previous allocation by giving the + * letter code for the allocation and the variable to alter + * (Not currently used in the system, update this if it is) */ extern int alter_block(List nodes, int conn_type) { @@ -1185,10 +1300,10 @@ extern int alter_block(List nodes, int conn_type) /* } */ } -/** +/* * After a block is deleted or altered following allocations must * be redone to make sure correct path will be used in the real system - * + * (Not currently used in the system, update this if it is) */ extern int redo_block(List nodes, int *geo, int conn_type, int new_count) { @@ -1211,64 +1326,19 @@ extern int redo_block(List nodes, int *geo, int conn_type, int new_count) } } -extern int copy_node_path(List nodes, List dest_nodes) -{ - int rc = SLURM_ERROR; - -#ifdef HAVE_BG - ListIterator itr = NULL; - ListIterator itr2 = NULL; - ba_node_t *ba_node = NULL, *new_ba_node = NULL; - int dim; - ba_switch_t *curr_switch = NULL, *new_switch = NULL; - - if(!nodes) - return SLURM_ERROR; - if(!dest_nodes) - dest_nodes = list_create(destroy_ba_node); - - itr = list_iterator_create(nodes); - while((ba_node = list_next(itr))) { - itr2 = list_iterator_create(dest_nodes); - while((new_ba_node = list_next(itr2))) { - if (ba_node->coord[X] == new_ba_node->coord[X] && - ba_node->coord[Y] == new_ba_node->coord[Y] && - ba_node->coord[Z] == new_ba_node->coord[Z]) - break; /* we found it */ - } - list_iterator_destroy(itr2); - - if(!new_ba_node) { - debug2("adding %c%c%c as a new node", - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]]); - new_ba_node = ba_copy_node(ba_node); - _new_ba_node(new_ba_node, ba_node->coord, false); - list_push(dest_nodes, new_ba_node); - - } - new_ba_node->used = true; - for(dim=0;dim<BA_SYSTEM_DIMENSIONS;dim++) { - curr_switch = &ba_node->axis_switch[dim]; - new_switch = &new_ba_node->axis_switch[dim]; - if(curr_switch->int_wire[0].used) { - if(!_copy_the_path(dest_nodes, - curr_switch, new_switch, - 0, dim)) { - rc = SLURM_ERROR; - break; - } - } - } - - } - list_iterator_destroy(itr); - rc = SLURM_SUCCESS; -#endif - return rc; -} - +/* + * Used to set a block into a virtual system. The system can be + * cleared first and this function sets all the wires and midplanes + * used in the nodelist given. The nodelist is a list of ba_node_t's + * that are already set up. This is very handly to test if there are + * any passthroughs used by one block when adding another block that + * also uses those wires, and neither use any overlapping + * midplanes. Doing a simple bitmap & will not reveal this. + * + * Returns SLURM_SUCCESS if nodelist fits into system without + * conflict, and SLURM_ERROR if nodelist conflicts with something + * already in the system. + */ extern int check_and_set_node_list(List nodes) { int rc = SLURM_ERROR; @@ -1352,6 +1422,19 @@ end_it: return rc; } +/* + * Used to find, and set up midplanes and the wires in the virtual + * system and return them in List results + * + * IN/OUT results - a list with a NULL destroyer filled in with + * midplanes and wires set to create the block with the api. If + * only interested in the hostlist NULL can be excepted also. + * IN start - where to start the allocation. + * IN geometry - the requested geometry of the block. + * IN conn_type - mesh, torus, or small. + * RET char * - hostlist of midplanes results represent must be + * xfreed. NULL on failure + */ extern char *set_bg_block(List results, int *start, int *geometry, int conn_type) { @@ -1457,6 +1540,7 @@ extern char *set_bg_block(List results, int *start, end_it: if(!send_results && results) { list_destroy(results); + results = NULL; } if(name!=NULL) { debug2("name = %s", name); @@ -1468,6 +1552,10 @@ end_it: return name; } +/* + * Resets the virtual system to a virgin state. If track_down_nodes is set + * then those midplanes are not set to idle, but kept in a down state. + */ extern int reset_ba_system(bool track_down_nodes) { int x; @@ -1496,8 +1584,15 @@ extern int reset_ba_system(bool track_down_nodes) return 1; } -/* need to call rest_all_removed_bps before starting another - * allocation attempt +/* + * Used to set all midplanes in a special used state except the ones + * we are able to use in a new allocation. + * + * IN: hostlist of midplanes we do not want + * RET: SLURM_SUCCESS on success, or SLURM_ERROR on error + * + * Note: Need to call reset_all_removed_bps before starting another + * allocation attempt after */ extern int removable_set_bps(char *bps) { @@ -1568,6 +1663,10 @@ extern int removable_set_bps(char *bps) return SLURM_SUCCESS; } +/* + * Resets the virtual system to the pervious state before calling + * removable_set_bps, or set_all_bps_except. + */ extern int reset_all_removed_bps() { int x; @@ -1588,7 +1687,11 @@ extern int reset_all_removed_bps() return SLURM_SUCCESS; } -/* need to call rest_all_removed_bps before starting another +/* + * IN: hostlist of midplanes we do not want + * RET: SLURM_SUCCESS on success, or SLURM_ERROR on error + * + * Need to call rest_all_removed_bps before starting another * allocation attempt if possible use removable_set_bps since it is * faster. It does basically the opposite of this function. If you * have to come up with this list though it is faster to use this @@ -1676,7 +1779,9 @@ extern int set_all_bps_except(char *bps) return SLURM_SUCCESS; } -/* init_grid - set values of every grid point */ +/* + * set values of every grid point (used in smap) + */ extern void init_grid(node_info_msg_t * node_info_ptr) { node_info_t *node_ptr = NULL; @@ -1761,6 +1866,155 @@ extern void init_grid(node_info_msg_t * node_info_ptr) return; } +/* + * Convert a BG API error code to a string + * IN inx - error code from any of the BG Bridge APIs + * RET - string describing the error condition + */ +extern char *bg_err_str(status_t inx) +{ +#ifdef HAVE_BG_FILES + switch (inx) { + case STATUS_OK: + return "Status OK"; + case PARTITION_NOT_FOUND: + return "Partition not found"; + case JOB_NOT_FOUND: + return "Job not found"; + case BP_NOT_FOUND: + return "Base partition not found"; + case SWITCH_NOT_FOUND: + return "Switch not found"; + case JOB_ALREADY_DEFINED: + return "Job already defined"; + case CONNECTION_ERROR: + return "Connection error"; + case INTERNAL_ERROR: + return "Internal error"; + case INVALID_INPUT: + return "Invalid input"; + case INCOMPATIBLE_STATE: + return "Incompatible state"; + case INCONSISTENT_DATA: + return "Inconsistent data"; + } +#endif + + return "?"; +} + +/* + * Set up the map for resolving + */ +extern int set_bp_map(void) +{ +#ifdef HAVE_BG_FILES + static rm_BGL_t *bg = NULL; + int rc; + rm_BP_t *my_bp = NULL; + ba_bp_map_t *bp_map = NULL; + int bp_num, i; + char *bp_id = NULL; + rm_location_t bp_loc; + int number = 0; + + if(_bp_map_initialized) + return 1; + + bp_map_list = list_create(_bp_map_list_del); + + if (!have_db2) { + fatal("Can't access DB2 library, run from service node"); + return -1; + } + + if (!getenv("DB2INSTANCE") || !getenv("VWSPATH")) { + fatal("Missing DB2INSTANCE or VWSPATH env var." + "Execute 'db2profile'"); + return -1; + } + + if ((rc = bridge_get_bg(&bg)) != STATUS_OK) { + error("bridge_get_BGL(): %d", rc); + return -1; + } + + if ((rc = bridge_get_data(bg, RM_BPNum, &bp_num)) != STATUS_OK) { + error("bridge_get_data(RM_BPNum): %d", rc); + bp_num = 0; + } + + for (i=0; i<bp_num; i++) { + + if (i) { + if ((rc = bridge_get_data(bg, RM_NextBP, &my_bp)) + != STATUS_OK) { + error("bridge_get_data(RM_NextBP): %d", rc); + break; + } + } else { + if ((rc = bridge_get_data(bg, RM_FirstBP, &my_bp)) + != STATUS_OK) { + error("bridge_get_data(RM_FirstBP): %d", rc); + break; + } + } + + bp_map = (ba_bp_map_t *) xmalloc(sizeof(ba_bp_map_t)); + + if ((rc = bridge_get_data(my_bp, RM_BPID, &bp_id)) + != STATUS_OK) { + xfree(bp_map); + error("bridge_get_data(RM_BPID): %d", rc); + continue; + } + + if(!bp_id) { + error("No BP ID was returned from database"); + continue; + } + + if ((rc = bridge_get_data(my_bp, RM_BPLoc, &bp_loc)) + != STATUS_OK) { + xfree(bp_map); + error("bridge_get_data(RM_BPLoc): %d", rc); + continue; + } + + bp_map->bp_id = xstrdup(bp_id); + bp_map->coord[X] = bp_loc.X; + bp_map->coord[Y] = bp_loc.Y; + bp_map->coord[Z] = bp_loc.Z; + + number = xstrntol(bp_id+1, NULL, + BA_SYSTEM_DIMENSIONS, HOSTLIST_BASE); +/* no longer needed for calculation */ +/* if(DIM_SIZE[X] > bp_loc.X */ +/* && DIM_SIZE[Y] > bp_loc.Y */ +/* && DIM_SIZE[Z] > bp_loc.Z) */ +/* ba_system_ptr->grid */ +/* [bp_loc.X] */ +/* [bp_loc.Y] */ +/* [bp_loc.Z].phys_x = */ +/* number / (HOSTLIST_BASE * HOSTLIST_BASE); */ + + list_push(bp_map_list, bp_map); + + free(bp_id); + } + + if ((rc = bridge_free_bg(bg)) != STATUS_OK) + error("bridge_free_BGL(): %s", rc); + +#endif + _bp_map_initialized = true; + return 1; + +} + +/* + * find a base blocks bg location + */ extern int *find_bp_loc(char* bp_id) { #ifdef HAVE_BG_FILES @@ -1787,6 +2041,9 @@ extern int *find_bp_loc(char* bp_id) #endif } +/* + * find a rack/midplace location + */ extern char *find_bp_rack_mid(char* xyz) { #ifdef HAVE_BG_FILES @@ -1826,6 +2083,9 @@ extern char *find_bp_rack_mid(char* xyz) #endif } +/* + * set the used wires in the virtual system for a block from the real system + */ extern int load_block_wiring(char *bg_block_id) { #ifdef HAVE_BG_FILES @@ -2043,6 +2303,10 @@ extern int load_block_wiring(char *bg_block_id) } +/* + * get the used wires for a block out of the database and return the + * node list + */ extern List get_and_set_block_wiring(char *bg_block_id) { #ifdef HAVE_BG_FILES @@ -2300,6 +2564,7 @@ static void _bp_map_list_del(void *object) } } +/* translation from the enum to the actual port number */ static int _port_enum(int port) { switch(port) { @@ -2328,6 +2593,10 @@ static int _port_enum(int port) #endif +/* + * This function is here to check options for rotating and elongating + * and set up the request based on the count of each option + */ static int _check_for_options(ba_request_t* ba_request) { int temp; @@ -2394,6 +2663,9 @@ static int _check_for_options(ba_request_t* ba_request) return 0; } +/* + * grab all the geometries that we can get and append them to the list geos + */ static int _append_geo(int *geometry, List geos, int rotate) { ListIterator itr; @@ -2436,6 +2708,9 @@ static int _append_geo(int *geometry, List geos, int rotate) return 1; } +/* + * + */ static int _fill_in_coords(List results, List start_list, int *geometry, int conn_type) { @@ -2903,150 +3178,6 @@ static int _reset_the_path(ba_switch_t *curr_switch, int source, // return 1; } -/* - * Convert a BG API error code to a string - * IN inx - error code from any of the BG Bridge APIs - * RET - string describing the error condition - */ -extern char *bg_err_str(status_t inx) -{ -#ifdef HAVE_BG_FILES - switch (inx) { - case STATUS_OK: - return "Status OK"; - case PARTITION_NOT_FOUND: - return "Partition not found"; - case JOB_NOT_FOUND: - return "Job not found"; - case BP_NOT_FOUND: - return "Base partition not found"; - case SWITCH_NOT_FOUND: - return "Switch not found"; - case JOB_ALREADY_DEFINED: - return "Job already defined"; - case CONNECTION_ERROR: - return "Connection error"; - case INTERNAL_ERROR: - return "Internal error"; - case INVALID_INPUT: - return "Invalid input"; - case INCOMPATIBLE_STATE: - return "Incompatible state"; - case INCONSISTENT_DATA: - return "Inconsistent data"; - } -#endif - - return "?"; -} - -/** */ -extern int set_bp_map(void) -{ -#ifdef HAVE_BG_FILES - static rm_BGL_t *bg = NULL; - int rc; - rm_BP_t *my_bp = NULL; - ba_bp_map_t *bp_map = NULL; - int bp_num, i; - char *bp_id = NULL; - rm_location_t bp_loc; - int number = 0; - - if(_bp_map_initialized) - return 1; - - bp_map_list = list_create(_bp_map_list_del); - - if (!have_db2) { - fatal("Can't access DB2 library, run from service node"); - return -1; - } - - if (!getenv("DB2INSTANCE") || !getenv("VWSPATH")) { - fatal("Missing DB2INSTANCE or VWSPATH env var." - "Execute 'db2profile'"); - return -1; - } - - if ((rc = bridge_get_bg(&bg)) != STATUS_OK) { - error("bridge_get_BGL(): %d", rc); - return -1; - } - - if ((rc = bridge_get_data(bg, RM_BPNum, &bp_num)) != STATUS_OK) { - error("bridge_get_data(RM_BPNum): %d", rc); - bp_num = 0; - } - - for (i=0; i<bp_num; i++) { - - if (i) { - if ((rc = bridge_get_data(bg, RM_NextBP, &my_bp)) - != STATUS_OK) { - error("bridge_get_data(RM_NextBP): %d", rc); - break; - } - } else { - if ((rc = bridge_get_data(bg, RM_FirstBP, &my_bp)) - != STATUS_OK) { - error("bridge_get_data(RM_FirstBP): %d", rc); - break; - } - } - - bp_map = (ba_bp_map_t *) xmalloc(sizeof(ba_bp_map_t)); - - if ((rc = bridge_get_data(my_bp, RM_BPID, &bp_id)) - != STATUS_OK) { - xfree(bp_map); - error("bridge_get_data(RM_BPID): %d", rc); - continue; - } - - if(!bp_id) { - error("No BP ID was returned from database"); - continue; - } - - if ((rc = bridge_get_data(my_bp, RM_BPLoc, &bp_loc)) - != STATUS_OK) { - xfree(bp_map); - error("bridge_get_data(RM_BPLoc): %d", rc); - continue; - } - - bp_map->bp_id = xstrdup(bp_id); - bp_map->coord[X] = bp_loc.X; - bp_map->coord[Y] = bp_loc.Y; - bp_map->coord[Z] = bp_loc.Z; - - number = xstrntol(bp_id+1, NULL, - BA_SYSTEM_DIMENSIONS, HOSTLIST_BASE); -/* no longer needed for calculation */ -/* if(DIM_SIZE[X] > bp_loc.X */ -/* && DIM_SIZE[Y] > bp_loc.Y */ -/* && DIM_SIZE[Z] > bp_loc.Z) */ -/* ba_system_ptr->grid */ -/* [bp_loc.X] */ -/* [bp_loc.Y] */ -/* [bp_loc.Z].phys_x = */ -/* number / (HOSTLIST_BASE * HOSTLIST_BASE); */ - - list_push(bp_map_list, bp_map); - - free(bp_id); - } - - if ((rc = bridge_free_bg(bg)) != STATUS_OK) - error("bridge_free_BGL(): %s", rc); - -#endif - _bp_map_initialized = true; - return 1; - -} - static void _new_ba_node(ba_node_t *ba_node, int *coord, bool track_down_nodes) { int i,j; diff --git a/src/plugins/select/bluegene/block_allocator/block_allocator.h b/src/plugins/select/bluegene/block_allocator/block_allocator.h index 133654462..36ec351bb 100644 --- a/src/plugins/select/bluegene/block_allocator/block_allocator.h +++ b/src/plugins/select/bluegene/block_allocator/block_allocator.h @@ -58,103 +58,115 @@ enum {X, Y, Z}; /* */ -/** +/* * structure that holds switch path information for finding the wiring * path without setting the configuration. * - * - geometry - node location * - dim - Which Axis it is on + * - geometry - node location * - in - ingress port. * - out - egress port. * */ typedef struct { - int geometry[BA_SYSTEM_DIMENSIONS]; int dim; + int geometry[BA_SYSTEM_DIMENSIONS]; int in; int out; } ba_path_switch_t; -/** +/* * structure that holds the configuration settings for each request - * - * - letter - filled in after the request is fulfilled - * - geometry - request size - * - size - node count for request - * - conn_type - MESH or TORUS or SMALL - * - rotate_count - when rotating we keep a count so we aren't in an infinate loop. - * - elongate_count - when elongating we keep a count so we aren't in an infinate loop. - * - rotate - weather to allow rotating or not. - * - elongate - weather to allow elongating or not. - * - force_contig - weather to allow force contiguous or not. - * */ typedef struct { - char *save_name; + bitstr_t *avail_node_bitmap; /* pointer to available nodes */ char *blrtsimage; /* BlrtsImage for this block */ + int conn_type; /* mesh, torus, or small */ + bool elongate; /* whether allow elongation or not */ + int elongate_count; /* place in elongate_geos list + we are at */ + List elongate_geos; /* list of possible shapes of + blocks. contains int* ptrs */ + int geometry[BA_SYSTEM_DIMENSIONS]; /* size of block in geometry */ char *linuximage; /* LinuxImage for this block */ char *mloaderimage; /* mloaderImage for this block */ + int nodecards; /* number of nodecards in + * block, only used for small + * block creation */ + bool passthrough; /* filled in if there are + passthroughs in the block created */ + int procs; /* Number of Real processors in + block */ + int quarters; /* number of midplane quarters in + * block, only used for small + * block creation */ char *ramdiskimage; /* RamDiskImage for this block */ - int geometry[BA_SYSTEM_DIMENSIONS]; - int start[BA_SYSTEM_DIMENSIONS]; - int start_req; - int size; - int procs; - int conn_type; - int rotate_count; - int elongate_count; - int nodecards; - int quarters; - bool passthrough; - bool rotate; - bool elongate; - List elongate_geos; - bitstr_t *avail_node_bitmap; /* pointer to available nodes */ + bool rotate; /* whether allow elongation or not */ + int rotate_count; /* number of times rotated */ + char *save_name; /* name of blocks in midplanes */ + int size; /* count of midplanes in block */ + int start[BA_SYSTEM_DIMENSIONS]; /* where to start creation of + block */ + int start_req; /* state there was a start + request */ } ba_request_t; +/* structure filled in from reading bluegene.conf file for block + * creation */ typedef struct { - char *block; + char *block; /* Hostlist of midplanes in the + block */ + int conn_type; /* mesh, torus, or small */ char *blrtsimage; /* BlrtsImage for this block */ char *linuximage; /* LinuxImage for this block */ char *mloaderimage; /* mloaderImage for this block */ + uint16_t nodecards; /* number of nodecards in + * block, only used for small + * block creation */ + uint16_t quarters; /* number of midplane quarters in + * block, only used for small + * block creation */ char *ramdiskimage; /* RamDiskImage for this block */ - int conn_type; - uint16_t quarters; - uint16_t nodecards; } blockreq_t; +/* structure filled in from reading bluegene.conf file for specifing + * images */ typedef struct { - char *name; - bool def; - List groups; + bool def; /* Whether image is the default + image or not */ + List groups; /* list of groups able to use + * the image contains + * image_group_t's */ + char *name; /* Name of image */ } image_t; typedef struct { char *name; gid_t gid; } image_group_t; -/** + +/* * structure that holds the configuration settings for each connection * - * - port_tar - which port the connection is going to - * interanlly - always going to something within the switch. - * exteranlly - always going to the next hop outside the switch. * - node_tar - coords of where the next hop is externally * interanlly - nothing. * exteranlly - location of next hop. + * - port_tar - which port the connection is going to + * interanlly - always going to something within the switch. + * exteranlly - always going to the next hop outside the switch. * - used - weather or not the connection is used. * */ typedef struct { - /* target port */ - int port_tar; - /* target label */ int node_tar[BA_SYSTEM_DIMENSIONS]; + /* target port */ + int port_tar; bool used; } ba_connection_t; -/** + +/* * structure that holds the configuration settings for each switch * which pretty much means the wiring information * - int_wire - keeps details of where the wires are attached @@ -173,23 +185,28 @@ typedef struct * ba_node_t: node within the allocation system. */ typedef struct { - /* set if using this node in a block */ - uint16_t used; - - /* coordinates */ + /* a switch for each dimensions */ + ba_switch_t axis_switch[BA_SYSTEM_DIMENSIONS]; + /* coordinates of midplane */ int coord[BA_SYSTEM_DIMENSIONS]; - ba_switch_t axis_switch[BA_SYSTEM_DIMENSIONS]; - char letter; + /* color of letter used in smap */ int color; + /* midplane index used for easy look up of the miplane */ int index; - int state; + /* letter used in smap */ + char letter; // int phys_x; // no longer needed + int state; + /* set if using this midplane in a block */ + uint16_t used; } ba_node_t; typedef struct { + /* total number of procs on the system */ int num_of_proc; - /* made to hold info about a system, which right now is only a grid of ba_nodes*/ + /* made to hold info about a system, which right now is only a + * grid of ba_nodes*/ #ifdef HAVE_BG ba_node_t ***grid; #else @@ -200,25 +217,32 @@ typedef struct { /* Used to Keep track of where the Base Blocks are at all times Rack and Midplane is the bp_id and XYZ is the coords. */ - typedef struct { char *bp_id; int coord[BA_SYSTEM_DIMENSIONS]; } ba_bp_map_t; /* Global */ -extern List bp_map_list; -extern char letters[62]; -extern char colors[6]; -extern int DIM_SIZE[BA_SYSTEM_DIMENSIONS]; -extern s_p_options_t bg_conf_file_options[]; - +extern List bp_map_list; /* list used for conversion from XYZ to Rack + * midplane */ +extern char letters[62]; /* complete list of letters used in smap */ +extern char colors[6]; /* index into colors used for smap */ +extern int DIM_SIZE[BA_SYSTEM_DIMENSIONS]; /* how many midplanes in + * each dimension */ +extern s_p_options_t bg_conf_file_options[]; /* used to parse the + * bluegene.conf file. */ + +/* Translate a state enum to a readable string */ extern char *bg_block_state_string(rm_partition_state_t state); + +/* Parse a block request from the bluegene.conf file */ extern int parse_blockreq(void **dest, slurm_parser_enum_t type, const char *key, const char *value, const char *line, char **leftover); extern void destroy_blockreq(void *ptr); + +/* Parse imagine information from blugene.conf file */ extern int parse_image(void **dest, slurm_parser_enum_t type, const char *key, const char *value, const char *line, char **leftover); @@ -231,18 +255,34 @@ extern void destroy_ba_node(void *ptr); * create a block request. Note that if the geometry is given, * then size is ignored. If elongate is true, the algorithm will try * to fit that a block of cubic shape and then it will try other - * elongated geometries. (ie, 2x2x2 -> 4x2x1 -> 8x1x1). Note that - * size must be a power of 2, given 3 dimensions. + * elongated geometries. (ie, 2x2x2 -> 4x2x1 -> 8x1x1). * - * OUT - ba_request: structure to allocate and fill in. - * IN - geometry: requested geometry of block - * IN - size: requested size of block - * IN - rotate: if true, allows rotation of block during fit + * IN/OUT - ba_request: structure to allocate and fill in. + * + * ALL below IN's need to be set within the ba_request before the call + * if you want them to be used. + * ALL below OUT's are set and returned within the ba_request. + * IN - avail_node_bitmap: bitmap of usable midplanes. + * IN - blrtsimage: BlrtsImage for this block if not default + * IN - conn_type: connection type of request (TORUS or MESH or SMALL) * IN - elongate: if true, will try to fit different geometries of * same size requests - * IN - contig: enforce contiguous regions constraint - * IN - conn_type: connection type of request (TORUS or MESH or SMALL) - * + * IN/OUT - geometry: requested/returned geometry of block + * IN - linuximage: LinuxImage for this block if not default + * IN - mloaderimage: MLoaderImage for this block if not default + * IN - nodecards: Number of nodecards in each block in request only + * used of small block allocations. + * OUT - passthroughs: if there were passthroughs used in the + * generation of the block. + * IN - procs: Number of real processors requested + * IN - quarters: Number of midplane quarters in each block in request only + * used of small block allocations. + * IN - RamDiskimage: RamDiskImage for this block if not default + * IN - rotate: if true, allows rotation of block during fit + * OUT - save_name: hostlist of midplanes used in block + * IN/OUT - size: requested/returned count of midplanes in block + * IN - start: geo location of where to start the allocation + * IN - start_req: if set use the start variable to start at * return success of allocation/validation of params */ extern int new_ba_request(ba_request_t* ba_request); @@ -267,44 +307,53 @@ extern void print_ba_request(ba_request_t* ba_request); * Initialize internal structures by either reading previous block * configurations from a file or by running the graph solver. * - * IN: dunno yet, probably some stuff denoting downed nodes, etc. + * IN: node_info_msg_t * can be null, + * should be from slurm_load_node(). * - * return: success or error of the intialization. + * return: void. */ -extern void ba_init(); -/* +extern void ba_init(node_info_msg_t *node_info_ptr); + +/* If emulating a system set up a known configuration for wires in a + * system of the size given. + * If a real bluegene system, query the system and get all wiring + * information of the system. */ extern void init_wires(); -/** + +/* * destroy all the internal (global) data structs. */ extern void ba_fini(); -/** - * set the node in the internal configuration as unusable +/* + * set the node in the internal configuration as in, or not in use, + * along with the current state of the node. * - * IN ba_node: ba_node_t to put down + * IN ba_node: ba_node_t to update state * IN state: new state of ba_node_t */ extern void ba_update_node_state(ba_node_t *ba_node, uint16_t state); -/** - * copy info from a ba_node +/* + * copy info from a ba_node, a direct memcpy of the ba_node_t * * IN ba_node: node to be copied - * OUT ba_node_t *: copied info must be freed with destroy_ba_node + * Returned ba_node_t *: copied info must be freed with destroy_ba_node */ extern ba_node_t *ba_copy_node(ba_node_t *ba_node); -/** +/* * copy the path of the nodes given * - * IN List of ba_node_t *'s: nodes to be copied - * OUT List of ba_node_t *'s: filled in list of nodes wiring + * IN nodes List of ba_node_t *'s: nodes to be copied + * OUT dest_nodes List of ba_node_t *'s: filled in list of nodes + * wiring. + * Return on success SLURM_SUCCESS, on error SLURM_ERROR */ -extern int copy_node_path(List nodes, List dest_nodes); +extern int copy_node_path(List nodes, List *dest_nodes); -/** +/* * Try to allocate a block. * * IN - ba_request: allocation request @@ -316,38 +365,99 @@ extern int copy_node_path(List nodes, List dest_nodes); */ extern int allocate_block(ba_request_t* ba_request, List results); -/** +/* * Admin wants to remove a previous allocation. * will allow Admin to delete a previous allocation retrival by letter code. */ extern int remove_block(List nodes, int new_count); -/** +/* * Admin wants to change something about a previous allocation. * will allow Admin to change previous allocation by giving the * letter code for the allocation and the variable to alter - * + * (Not currently used in the system, update this if it is) */ extern int alter_block(List nodes, int conn_type); -/** +/* * After a block is deleted or altered following allocations must * be redone to make sure correct path will be used in the real system - * + * (Not currently used in the system, update this if it is) */ extern int redo_block(List nodes, int *geo, int conn_type, int new_count); +/* + * Used to set a block into a virtual system. The system can be + * cleared first and this function sets all the wires and midplanes + * used in the nodelist given. The nodelist is a list of ba_node_t's + * that are already set up. This is very handly to test if there are + * any passthroughs used by one block when adding another block that + * also uses those wires, and neither use any overlapping + * midplanes. Doing a simple bitmap & will not reveal this. + * + * Returns SLURM_SUCCESS if nodelist fits into system without + * conflict, and SLURM_ERROR if nodelist conflicts with something + * already in the system. + */ extern int check_and_set_node_list(List nodes); +/* + * Used to find, and set up midplanes and the wires in the virtual + * system and return them in List results + * + * IN/OUT results - a list with a NULL destroyer filled in with + * midplanes and wires set to create the block with the api. If + * only interested in the hostlist NULL can be excepted also. + * IN start - where to start the allocation. + * IN geometry - the requested geometry of the block. + * IN conn_type - mesh, torus, or small. + * RET char * - hostlist of midplanes results represent must be + * xfreed. NULL on failure + */ extern char *set_bg_block(List results, int *start, int *geometry, int conn_type); +/* + * Resets the virtual system to a virgin state. If track_down_nodes is set + * then those midplanes are not set to idle, but kept in a down state. + */ extern int reset_ba_system(bool track_down_nodes); + +/* + * Used to set all midplanes in a special used state except the ones + * we are able to use in a new allocation. + * + * IN: hostlist of midplanes we do not want + * RET: SLURM_SUCCESS on success, or SLURM_ERROR on error + * + * Note: Need to call reset_all_removed_bps before starting another + * allocation attempt after + */ extern int removable_set_bps(char *bps); + +/* + * Resets the virtual system to the pervious state before calling + * removable_set_bps, or set_all_bps_except. + */ extern int reset_all_removed_bps(); + +/* + * IN: hostlist of midplanes we do not want + * RET: SLURM_SUCCESS on success, or SLURM_ERROR on error + * + * Need to call rest_all_removed_bps before starting another + * allocation attempt. If possible use removable_set_bps since it is + * faster. It does basically the opposite of this function. If you + * have to come up with this list though it is faster to use this + * function than if you have to call bitmap2node_name since that is slow. + */ extern int set_all_bps_except(char *bps); +/* + * set values of every grid point (used in smap) + */ extern void init_grid(node_info_msg_t *node_info_ptr); + /* * Convert a BG API error code to a string * IN inx - error code from any of the BG Bridge APIs @@ -355,27 +465,27 @@ extern void init_grid(node_info_msg_t *node_info_ptr); */ extern char *bg_err_str(status_t inx); -/** +/* * Set up the map for resolving */ extern int set_bp_map(void); -/** - * find a base blocks bg location +/* + * find a base blocks bg location based on Rack Midplane name R000 not R00-M0 */ extern int *find_bp_loc(char* bp_id); -/** - * find a rack/midplace location +/* + * find a rack/midplace location based on XYZ coords */ extern char *find_bp_rack_mid(char* xyz); -/** - * set the used wires for a block out of the database +/* + * set the used wires in the virtual system for a block from the real system */ extern int load_block_wiring(char *bg_block_id); -/** +/* * get the used wires for a block out of the database and return the * node list */ diff --git a/src/plugins/select/bluegene/plugin/bg_block_info.c b/src/plugins/select/bluegene/plugin/bg_block_info.c index 1e2c0ebaf..9c408f5d7 100644 --- a/src/plugins/select/bluegene/plugin/bg_block_info.c +++ b/src/plugins/select/bluegene/plugin/bg_block_info.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * bg_block_info.c - bluegene block information from the db2 database. * - * $Id: bg_block_info.c 10893 2007-01-29 21:53:48Z da $ + * $Id: bg_block_info.c 14904 2008-08-26 21:19:57Z da $ ***************************************************************************** * Copyright (C) 2004-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -236,7 +236,6 @@ extern int update_block_list() char *name = NULL; bg_record_t *bg_record = NULL; time_t now; - int skipped_dealloc = 0; kill_job_struct_t *freeit = NULL; ListIterator itr = NULL; @@ -305,6 +304,7 @@ extern int update_block_list() } else if(bg_record->job_running != BLOCK_ERROR_STATE //plugin set error && bg_record->state != state) { + int skipped_dealloc = 0; debug("state of Block %s was %d and now is %d", bg_record->bg_block_id, bg_record->state, @@ -319,11 +319,9 @@ extern int update_block_list() bg_record->state = state; - if(bg_record->state == RM_PARTITION_DEALLOCATING) { + if(bg_record->state == RM_PARTITION_DEALLOCATING + || skipped_dealloc) { _block_is_deallocating(bg_record); - } else if(skipped_dealloc) { - _block_is_deallocating(bg_record); - skipped_dealloc = 0; } else if(bg_record->state == RM_PARTITION_CONFIGURING) bg_record->boot_state = 1; updated = 1; @@ -438,7 +436,6 @@ extern int update_freeing_block_list() rm_partition_state_t state; char *name = NULL; bg_record_t *bg_record = NULL; - int skipped_dealloc = 0; ListIterator itr = NULL; if(!bg_freeing_list) @@ -489,14 +486,6 @@ extern int update_freeing_block_list() bg_record->bg_block_id, bg_record->state, state); - /* - check to make sure block went - through freeing correctly - */ - if(bg_record->state - != RM_PARTITION_DEALLOCATING - && state == RM_PARTITION_FREE) - skipped_dealloc = 1; bg_record->state = state; } diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c index 9f0cea8a6..4bc37379f 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_place.c +++ b/src/plugins/select/bluegene/plugin/bg_job_place.c @@ -2,7 +2,7 @@ * bg_job_place.c - blue gene job placement (e.g. base block selection) * functions. * - * $Id: bg_job_place.c 14660 2008-07-30 17:39:47Z jette $ + * $Id: bg_job_place.c 14952 2008-09-03 16:08:14Z da $ ***************************************************************************** * Copyright (C) 2004-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -531,6 +531,7 @@ static int _check_for_booted_overlapping_blocks( bg_record_t *found_record = NULL; ListIterator itr = NULL; int rc = 0; + int overlap = 0; /* this test only is for actually picking a block not testing */ if(test_only && bluegene_layout_mode == LAYOUT_DYNAMIC) @@ -549,7 +550,12 @@ static int _check_for_booted_overlapping_blocks( continue; } - if(blocks_overlap(bg_record, found_record)) { + slurm_mutex_lock(&block_state_mutex); + overlap = blocks_overlap(bg_record, found_record); + slurm_mutex_unlock(&block_state_mutex); + + if(overlap) { + overlap = 0; /* make the available time on this block * (bg_record) the max of this found_record's job * or the one already set if in overlapped_block_list diff --git a/src/plugins/select/bluegene/plugin/bg_job_run.c b/src/plugins/select/bluegene/plugin/bg_job_run.c index 0e70c99fc..20f326d46 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_run.c +++ b/src/plugins/select/bluegene/plugin/bg_job_run.c @@ -2,7 +2,7 @@ * bg_job_run.c - blue gene job execution (e.g. initiation and termination) * functions. * - * $Id: bg_job_run.c 14660 2008-07-30 17:39:47Z jette $ + * $Id: bg_job_run.c 14938 2008-08-29 21:49:01Z da $ ***************************************************************************** * Copyright (C) 2004-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -62,6 +62,7 @@ #include "src/common/xstring.h" #include "src/slurmctld/proc_req.h" #include "bluegene.h" +#include "src/slurmctld/locks.h" #define MAX_POLL_RETRIES 220 #define POLL_INTERVAL 3 @@ -185,6 +186,50 @@ static int _remove_job(db_job_id_t job_id) } #endif +/* block_state_mutex should be locked before calling this function */ +static int _reset_block(bg_record_t *bg_record) +{ + int rc = SLURM_SUCCESS; + if(bg_record) { + if(bg_record->job_running > NO_JOB_RUNNING) { + bg_record->job_running = NO_JOB_RUNNING; + bg_record->job_ptr = NULL; + } + /* remove user from list */ + + slurm_conf_lock(); + if(bg_record->target_name) { + if(strcmp(bg_record->target_name, + slurmctld_conf.slurm_user_name)) { + xfree(bg_record->target_name); + bg_record->target_name = + xstrdup(slurmctld_conf. + slurm_user_name); + } + update_block_user(bg_record, 1); + } else { + bg_record->target_name = + xstrdup(slurmctld_conf.slurm_user_name); + } + slurm_conf_unlock(); + + bg_record->boot_state = 0; + bg_record->boot_count = 0; + + last_bg_update = time(NULL); + if(remove_from_bg_list(bg_job_block_list, bg_record) + == SLURM_SUCCESS) { + num_unused_cpus += + bg_record->bp_count*bg_record->cpus_per_bp; + } + } else { + error("No block given to reset"); + rc = SLURM_ERROR; + } + + return rc; +} + /* Delete a bg_update_t record */ static void _bg_list_del(void *x) { @@ -262,7 +307,10 @@ static void _start_agent(bg_update_t *bg_update_ptr) bg_record_t *found_record = NULL; ListIterator itr; List delete_list; - + int requeue_job = 0; + slurmctld_lock_t job_write_lock = { + NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; + slurm_mutex_lock(&job_start_mutex); bg_record = @@ -271,7 +319,20 @@ static void _start_agent(bg_update_t *bg_update_ptr) if(!bg_record) { error("block %s not found in bg_list", bg_update_ptr->bg_block_id); - (void) slurm_fail_job(bg_update_ptr->job_ptr->job_id); + /* wait for the slurmd to begin + the batch script, slurm_fail_job() + is a no-op if issued prior + to the script initiation do clean up just + incase the fail job isn't ran */ + sleep(2); + lock_slurmctld(job_write_lock); + if((rc = job_requeue(0, bg_update_ptr->job_ptr->job_id, -1))) { + error("couldn't requeue job %u, failing it: %s", + bg_update_ptr->job_ptr->job_id, + slurm_strerror(rc)); + job_fail(bg_update_ptr->job_ptr->job_id); + } + unlock_slurmctld(job_write_lock); slurm_mutex_unlock(&job_start_mutex); return; } @@ -306,7 +367,21 @@ static void _start_agent(bg_update_t *bg_update_ptr) bg_record->bg_block_id); continue; } - + + if(found_record->job_ptr) { + error("Trying to start job %u on block %s, " + "but there is a job %u running on an overlapping " + "block %s it will not end until %u. " + "This should never happen.", + bg_update_ptr->job_ptr->job_id, + bg_record->bg_block_id, + found_record->job_ptr->job_id, + found_record->bg_block_id, + found_record->job_ptr->end_time); + requeue_job = 1; + break; + } + debug2("need to make sure %s is free, it's part of %s", found_record->bg_block_id, bg_record->bg_block_id); @@ -317,6 +392,33 @@ static void _start_agent(bg_update_t *bg_update_ptr) num_block_to_free++; } list_iterator_destroy(itr); + + if(requeue_job) { + num_block_to_free = 0; + num_block_freed = 0; + list_destroy(delete_list); + + _reset_block(bg_record); + + slurm_mutex_unlock(&block_state_mutex); + /* wait for the slurmd to begin + the batch script, slurm_fail_job() + is a no-op if issued prior + to the script initiation do clean up just + incase the fail job isn't ran */ + sleep(2); + lock_slurmctld(job_write_lock); + if((rc = job_requeue(0, bg_update_ptr->job_ptr->job_id, -1))) { + error("couldn't requeue job %u, failing it: %s", + bg_update_ptr->job_ptr->job_id, + slurm_strerror(rc)); + job_fail(bg_update_ptr->job_ptr->job_id); + } + unlock_slurmctld(job_write_lock); + slurm_mutex_unlock(&job_start_mutex); + return; + } + free_block_list(delete_list); list_destroy(delete_list); slurm_mutex_unlock(&block_state_mutex); @@ -422,20 +524,30 @@ static void _start_agent(bg_update_t *bg_update_ptr) if(bg_record->state == RM_PARTITION_FREE) { if((rc = boot_block(bg_record)) != SLURM_SUCCESS) { - sleep(2); - /* wait for the slurmd to begin - the batch script, slurm_fail_job() - is a no-op if issued prior - to the script initiation do clean up just - incase the fail job isn't ran */ - (void) slurm_fail_job(bg_update_ptr->job_ptr->job_id); slurm_mutex_lock(&block_state_mutex); + _reset_block(bg_record); if (remove_from_bg_list(bg_job_block_list, bg_record) == SLURM_SUCCESS) { num_unused_cpus += bg_record->bp_count *bg_record->cpus_per_bp; } slurm_mutex_unlock(&block_state_mutex); + sleep(2); + /* wait for the slurmd to begin + the batch script, slurm_fail_job() + is a no-op if issued prior + to the script initiation do clean up just + incase the fail job isn't ran */ + lock_slurmctld(job_write_lock); + if((rc = job_requeue( + 0, bg_update_ptr->job_ptr->job_id, -1))) { + error("couldn't requeue job %u, failing it: %s", + bg_update_ptr->job_ptr->job_id, + slurm_strerror(rc)); + job_fail(bg_update_ptr->job_ptr->job_id); + } + lock_slurmctld(job_write_lock); + slurm_mutex_unlock(&job_start_mutex); return; } @@ -609,37 +721,9 @@ static void _term_agent(bg_update_t *bg_update_ptr) } slurm_mutex_lock(&block_state_mutex); - if(bg_record->job_running > NO_JOB_RUNNING) { - bg_record->job_running = NO_JOB_RUNNING; - bg_record->job_ptr = NULL; - } - /* remove user from list */ - - slurm_conf_lock(); - if(bg_record->target_name) { - if(strcmp(bg_record->target_name, - slurmctld_conf.slurm_user_name)) { - xfree(bg_record->target_name); - bg_record->target_name = - xstrdup(slurmctld_conf. - slurm_user_name); - } - update_block_user(bg_record, 1); - } else { - bg_record->target_name = - xstrdup(slurmctld_conf.slurm_user_name); - } - slurm_conf_unlock(); - - bg_record->boot_state = 0; - bg_record->boot_count = 0; + + _reset_block(bg_record); - last_bg_update = time(NULL); - if(remove_from_bg_list(bg_job_block_list, bg_record) - == SLURM_SUCCESS) { - num_unused_cpus += - bg_record->bp_count*bg_record->cpus_per_bp; - } slurm_mutex_unlock(&block_state_mutex); } else { diff --git a/src/plugins/select/bluegene/plugin/bg_record_functions.c b/src/plugins/select/bluegene/plugin/bg_record_functions.c index 9d7d8c7d9..20658b791 100644 --- a/src/plugins/select/bluegene/plugin/bg_record_functions.c +++ b/src/plugins/select/bluegene/plugin/bg_record_functions.c @@ -645,7 +645,7 @@ extern int add_bg_record(List records, List used_nodes, blockreq_t *blockreq) bg_record->bg_block_list = list_create(destroy_ba_node); if(used_nodes) { - if(copy_node_path(used_nodes, bg_record->bg_block_list) + if(copy_node_path(used_nodes, &bg_record->bg_block_list) == SLURM_ERROR) error("couldn't copy the path for the allocation"); bg_record->bp_count = list_count(used_nodes); diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c index e6cf2017f..95d731690 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.c +++ b/src/plugins/select/bluegene/plugin/bluegene.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * bluegene.c - blue gene node configuration processing module. * - * $Id: bluegene.c 13924 2008-04-23 06:24:55Z da $ + * $Id: bluegene.c 14952 2008-09-03 16:08:14Z da $ ***************************************************************************** * Copyright (C) 2004 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -201,11 +201,15 @@ extern void fini_bg(void) ba_fini(); } +/* + * block_state_mutex should be locked before calling this function + */ extern bool blocks_overlap(bg_record_t *rec_a, bg_record_t *rec_b) { bitstr_t *my_bitmap = NULL; - if(rec_a->bp_count > 1 && rec_a->bp_count > 1) { + if((rec_a->bp_count > 1) && (rec_b->bp_count > 1)) { + /* Test for conflicting passthroughs */ reset_ba_system(false); check_and_set_node_list(rec_a->bg_block_list); if(check_and_set_node_list(rec_b->bg_block_list) @@ -607,6 +611,14 @@ extern void *mult_free_block(void *args) usleep(100000); continue; } + if(bg_record->job_ptr) { + info("We are freeing a block (%s) that " + "has job %u(%u), This should never happen.\n", + bg_record->bg_block_id, + bg_record->job_ptr->job_id, + bg_record->job_running); + term_jobs_on_block(bg_record->bg_block_id); + } debug("freeing the block %s.", bg_record->bg_block_id); bg_free_block(bg_record); debug("done\n"); diff --git a/src/plugins/select/bluegene/plugin/defined_block.c b/src/plugins/select/bluegene/plugin/defined_block.c index f915dfdc3..86a0dd645 100644 --- a/src/plugins/select/bluegene/plugin/defined_block.c +++ b/src/plugins/select/bluegene/plugin/defined_block.c @@ -182,7 +182,7 @@ extern int create_defined_blocks(bg_layout_t overlapped, list_create(destroy_ba_node); copy_node_path( results, - bg_record->bg_block_list); + &bg_record->bg_block_list); list_destroy(results); } } @@ -363,7 +363,7 @@ extern int create_full_system_block(List bg_found_block_list) if(bg_record->bg_block_list) list_destroy(bg_record->bg_block_list); bg_record->bg_block_list = list_create(destroy_ba_node); - copy_node_path(results, bg_record->bg_block_list); + copy_node_path(results, &bg_record->bg_block_list); list_destroy(results); if((rc = configure_block(bg_record)) == SLURM_ERROR) { diff --git a/src/plugins/select/bluegene/plugin/select_bluegene.c b/src/plugins/select/bluegene/plugin/select_bluegene.c index fdca775d6..433d5705a 100644 --- a/src/plugins/select/bluegene/plugin/select_bluegene.c +++ b/src/plugins/select/bluegene/plugin/select_bluegene.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * select_bluegene.c - node selection plugin for Blue Gene system. * - * $Id: select_bluegene.c 14660 2008-07-30 17:39:47Z jette $ + * $Id: select_bluegene.c 14952 2008-09-03 16:08:14Z da $ ***************************************************************************** * Copyright (C) 2004-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -431,6 +431,7 @@ extern int select_p_state_restore(char *dir_name) error("select_p_state_restore: problem unpacking node_info"); goto unpack_error; } + slurm_mutex_lock(&block_state_mutex); reset_ba_system(false); node_bitmap = bit_alloc(node_record_count); @@ -481,11 +482,9 @@ extern int select_p_state_restore(char *dir_name) list_iterator_reset(itr); if(bg_record) { - slurm_mutex_lock(&block_state_mutex); if(bg_info_record->state == RM_PARTITION_ERROR) bg_record->job_running = BLOCK_ERROR_STATE; bg_record->state = bg_info_record->state; - slurm_mutex_unlock(&block_state_mutex); blocks++; } else { int ionodes = 0; @@ -602,7 +601,7 @@ extern int select_p_state_restore(char *dir_name) list_destroy(bg_record->bg_block_list); bg_record->bg_block_list = list_create(destroy_ba_node); - copy_node_path(results, bg_record->bg_block_list); + copy_node_path(results, &bg_record->bg_block_list); list_destroy(results); configure_block(bg_record); @@ -614,7 +613,6 @@ extern int select_p_state_restore(char *dir_name) FREE_NULL_BITMAP(node_bitmap); list_iterator_destroy(itr); - slurm_mutex_lock(&block_state_mutex); sort_bg_record_inc_size(bg_list); slurm_mutex_unlock(&block_state_mutex); diff --git a/src/plugins/select/bluegene/plugin/slurm_prolog.c b/src/plugins/select/bluegene/plugin/slurm_prolog.c index af652b787..f4ad1d020 100644 --- a/src/plugins/select/bluegene/plugin/slurm_prolog.c +++ b/src/plugins/select/bluegene/plugin/slurm_prolog.c @@ -176,10 +176,11 @@ static int _get_job_size(uint32_t job_id) } /* - * Test if any BG blocks are in deallocating state + * Test if any BG blocks are in deallocating state since they are + * probably related to this job we will want to sleep longer * RET 1: deallocate in progress * 0: no deallocate in progress - * -1: error occurred + * -1: error occurred */ static int _partitions_dealloc() { diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index b5a132e06..79b595cdb 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -2,7 +2,7 @@ * select_cons_res.c - node selection plugin supporting consumable * resources policies. * - * $Id: select_cons_res.c 14873 2008-08-25 18:19:31Z jette $ + * $Id: select_cons_res.c 14907 2008-08-26 22:27:26Z jette $ *****************************************************************************\ * * The following example below illustrates how four jobs are allocated @@ -161,7 +161,8 @@ static uint32_t last_verified_job_id = 0; static void _cr_job_list_del(void *x); static int _cr_job_list_sort(void *x, void *y); -static struct node_cr_record *_dup_node_cr(struct node_cr_record *node_cr_ptr); +static struct node_cr_record *_dup_node_cr(struct node_cr_record *node_cr_ptr, + int node_cr_cnt); static int _job_test(struct job_record *job_ptr, bitstr_t *bitmap, uint32_t min_nodes, uint32_t max_nodes, uint32_t req_nodes, int mode, @@ -222,43 +223,55 @@ static void _dump_state(struct node_cr_record *select_node_ptr) } #endif +/* Create a duplicate part_cr_record structure */ +static struct part_cr_record *_dup_part_cr(struct node_cr_record *node_cr_ptr) +{ + int i, j, part_cnt; + struct part_cr_record *part_cr_ptr, *new_part_cr_ptr; + + part_cnt = node_cr_ptr->node_ptr->part_cnt; + new_part_cr_ptr = xmalloc(sizeof(struct part_cr_record) * part_cnt); + part_cr_ptr = node_cr_ptr->parts; + for (i=0; i<part_cnt; i++) { + if (!part_cr_ptr) + break; + new_part_cr_ptr[i].part_ptr = part_cr_ptr->part_ptr; + new_part_cr_ptr[i].num_rows = part_cr_ptr->num_rows; + j = sizeof(uint16_t) * part_cr_ptr->num_rows * + select_node_ptr->sockets; + new_part_cr_ptr[i].alloc_cores = xmalloc(j); + memcpy(new_part_cr_ptr[i].alloc_cores, + part_cr_ptr->alloc_cores, j); + if (i > 0) + new_part_cr_ptr[i-1].next = &new_part_cr_ptr[i]; + part_cr_ptr = part_cr_ptr->next; + } + return new_part_cr_ptr; +} + /* Create a duplicate node_cr_records structure */ -static struct node_cr_record *_dup_node_cr(struct node_cr_record *node_cr_ptr) +static struct node_cr_record *_dup_node_cr(struct node_cr_record *node_cr_ptr, + int node_cr_cnt) { - int i, j; + int i; struct node_cr_record *new_node_cr_ptr; - struct part_cr_record *part_cr_ptr, *new_part_cr_ptr; if (node_cr_ptr == NULL) return NULL; - new_node_cr_ptr = xmalloc(select_node_cnt * - sizeof(struct node_cr_record)); + new_node_cr_ptr = xmalloc(sizeof(struct node_cr_record) * + node_cr_cnt); - for (i=0; i<select_node_cnt; i++) { - new_node_cr_ptr[i].node_ptr = select_node_ptr[i].node_ptr; - new_node_cr_ptr[i].cpus = select_node_ptr[i].cpus; - new_node_cr_ptr[i].sockets = select_node_ptr[i].sockets; - new_node_cr_ptr[i].cores = select_node_ptr[i].cores; - new_node_cr_ptr[i].threads = select_node_ptr[i].threads; - new_node_cr_ptr[i].real_memory = select_node_ptr[i].real_memory; - new_node_cr_ptr[i].alloc_memory = select_node_ptr[i].alloc_memory; - new_node_cr_ptr[i].node_state = select_node_ptr[i].node_state; - - part_cr_ptr = select_node_ptr[i].parts; - while (part_cr_ptr) { - new_part_cr_ptr = xmalloc(sizeof(struct part_cr_record)); - new_part_cr_ptr->part_ptr = part_cr_ptr->part_ptr; - new_part_cr_ptr->num_rows = part_cr_ptr->num_rows; - j = sizeof(uint16_t) * part_cr_ptr->num_rows * - select_node_ptr[i].sockets; - new_part_cr_ptr->alloc_cores = xmalloc(j); - memcpy(new_part_cr_ptr->alloc_cores, - part_cr_ptr->alloc_cores, j); - new_part_cr_ptr->next = new_node_cr_ptr[i].parts; - new_node_cr_ptr[i].parts = new_part_cr_ptr; - part_cr_ptr = part_cr_ptr->next; - } + for (i=0; i<node_cr_cnt; i++) { + new_node_cr_ptr[i].node_ptr = node_cr_ptr[i].node_ptr; + new_node_cr_ptr[i].cpus = node_cr_ptr[i].cpus; + new_node_cr_ptr[i].sockets = node_cr_ptr[i].sockets; + new_node_cr_ptr[i].cores = node_cr_ptr[i].cores; + new_node_cr_ptr[i].threads = node_cr_ptr[i].threads; + new_node_cr_ptr[i].real_memory = node_cr_ptr[i].real_memory; + new_node_cr_ptr[i].alloc_memory = node_cr_ptr[i].alloc_memory; + new_node_cr_ptr[i].node_state = node_cr_ptr[i].node_state; + new_node_cr_ptr[i].parts = _dup_part_cr(&node_cr_ptr[i]); } return new_node_cr_ptr; } @@ -803,11 +816,14 @@ static uint16_t _count_idle_cpus(struct node_cr_record *this_node) static int _synchronize_bitmaps(bitstr_t ** partially_idle_bitmap) { - int rc = SLURM_SUCCESS; int size, i, idlecpus = bit_set_count(avail_node_bitmap); size = bit_size(avail_node_bitmap); bitstr_t *bitmap = bit_alloc(size); + *partially_idle_bitmap = bitmap; + if (bitmap == NULL) + return SLURM_ERROR; + debug3("cons_res: synch_bm: size avail %d (%d set) size idle %d ", size, idlecpus, bit_size(idle_node_bitmap)); @@ -827,10 +843,7 @@ static int _synchronize_bitmaps(bitstr_t ** partially_idle_bitmap) idlecpus = bit_set_count(bitmap); debug3("cons_res: synch found %d partially idle nodes", idlecpus); - *partially_idle_bitmap = bitmap; - if (rc != SLURM_SUCCESS) - FREE_NULL_BITMAP(bitmap); - return rc; + return SLURM_SUCCESS; } /* allocate resources to the given job @@ -2301,7 +2314,7 @@ static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap, /* Job is still pending. Simulate termination of jobs one at a time * to determine when and where the job can start. */ - exp_node_cr = _dup_node_cr(select_node_ptr); + exp_node_cr = _dup_node_cr(select_node_ptr, select_node_cnt); if (exp_node_cr == NULL) { bit_free(orig_map); return SLURM_ERROR; @@ -2350,7 +2363,7 @@ static int _will_run_test(struct job_record *job_ptr, bitstr_t *bitmap, } list_iterator_destroy(job_iterator); list_destroy(cr_job_list); - _destroy_node_part_array(exp_node_cr); + _xfree_select_nodes(exp_node_cr, select_node_cnt); bit_free(orig_map); return rc; } diff --git a/src/sacct/options.c b/src/sacct/options.c index ed584204d..b7c2737fc 100644 --- a/src/sacct/options.c +++ b/src/sacct/options.c @@ -1,9 +1,8 @@ /*****************************************************************************\ * options.c - option functions for sacct - * - * $Id: options.c 7541 2006-03-18 01:44:58Z da $ ***************************************************************************** - * Copyright (C) 2006 The Regents of the University of California. + * Copyright (C) 2006-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Danny Auble <da@llnl.gov>. * LLNL-CODE-402394. @@ -563,7 +562,8 @@ int get_data(void) job_cond->usage_end = params.opt_end; job_cond->userid_list = params.opt_uid_list; - jobs = jobacct_storage_g_get_jobs_cond(acct_db_conn, job_cond); + jobs = jobacct_storage_g_get_jobs_cond(acct_db_conn, getuid(), + job_cond); destroy_acct_job_cond(job_cond); } @@ -752,19 +752,12 @@ void parse_command_line(int argc, char **argv) if(params.opt_stat) xfree(params.opt_field_list); - params.opt_field_list = - xrealloc(params.opt_field_list, - (params.opt_field_list==NULL? 0 : - strlen(params.opt_field_list)) + - strlen(optarg) + 1); - strcat(params.opt_field_list, optarg); - strcat(params.opt_field_list, ","); + xstrfmtcat(params.opt_field_list, "%s,", optarg); break; case 'f': - params.opt_filein = - xrealloc(params.opt_filein, strlen(optarg)+1); - strcpy(params.opt_filein, optarg); + xfree(params.opt_filein); + params.opt_filein = xstrdup(optarg); break; case 'g': @@ -822,10 +815,8 @@ void parse_command_line(int argc, char **argv) break; case 'S': if(!params.opt_field_list) { - params.opt_field_list = - xmalloc(sizeof(STAT_FIELDS)+1); - strcat(params.opt_field_list, STAT_FIELDS); - strcat(params.opt_field_list, ","); + xstrfmtcat(params.opt_field_list, "%s,", + STAT_FIELDS); } params.opt_stat = 1; break; @@ -860,18 +851,8 @@ void parse_command_line(int argc, char **argv) break; case 'V': - { - char obuf[20]; /* should be long enough */ - char *rev="$Revision: 7267 $"; - char *s; - - s=strstr(rev, " ")+1; - for (i=0; s[i]!=' '; i++) - obuf[i]=s[i]; - obuf[i] = 0; - printf("%s: %s\n", argv[0], obuf); + printf("%s %s\n", PACKAGE, SLURM_VERSION); exit(0); - } case ':': case '?': /* getopt() has explained it */ @@ -1056,13 +1037,7 @@ void parse_command_line(int argc, char **argv) else dot = BRIEF_FIELDS; - params.opt_field_list = - xrealloc(params.opt_field_list, - (params.opt_field_list==NULL? 0 : - sizeof(params.opt_field_list)) + - strlen(dot)+1); - xstrcat(params.opt_field_list, dot); - xstrcat(params.opt_field_list, ","); + xstrfmtcat(params.opt_field_list, "%s,", dot); } if(long_output) { @@ -1070,14 +1045,8 @@ void parse_command_line(int argc, char **argv) dot = LONG_COMP_FIELDS; else dot = LONG_FIELDS; - - params.opt_field_list = - xrealloc(params.opt_field_list, - (params.opt_field_list==NULL? 0 : - strlen(params.opt_field_list)) + - strlen(dot)+1); - xstrcat(params.opt_field_list, dot); - xstrcat(params.opt_field_list, ","); + + xstrfmtcat(params.opt_field_list, "%s,", dot); } if (params.opt_field_list==NULL) { @@ -1087,8 +1056,8 @@ void parse_command_line(int argc, char **argv) dot = DEFAULT_COMP_FIELDS; else dot = DEFAULT_FIELDS; - params.opt_field_list = xstrdup(dot); - xstrcat(params.opt_field_list, ","); + + xstrfmtcat(params.opt_field_list, "%s,", dot); } start = params.opt_field_list; diff --git a/src/sacctmgr/account_functions.c b/src/sacctmgr/account_functions.c index 8a4aa3797..725d9ca98 100644 --- a/src/sacctmgr/account_functions.c +++ b/src/sacctmgr/account_functions.c @@ -127,7 +127,7 @@ static int _set_cond(int *start, int argc, char *argv[], if(!qos_list) { qos_list = acct_storage_g_get_qos( - db_conn, NULL); + db_conn, my_uid, NULL); } addto_qos_char_list(acct_cond->qos_list, qos_list, @@ -224,7 +224,7 @@ static int _set_rec(int *start, int argc, char *argv[], if(!qos_list) { qos_list = acct_storage_g_get_qos( - db_conn, NULL); + db_conn, my_uid, NULL); } if(end > 2 && argv[i][end-1] == '=' && (argv[i][end-2] == '+' @@ -255,6 +255,38 @@ static int _set_rec(int *start, int argc, char *argv[], return 0; } +static int _isdefault(List acct_list) +{ + int rc = 0; + acct_user_cond_t user_cond; + List ret_list = NULL; + + if(!acct_list || !list_count(acct_list)) + return rc; + + memset(&user_cond, 0, sizeof(acct_user_cond_t)); + user_cond.def_acct_list = acct_list; + + ret_list = acct_storage_g_get_users(db_conn, my_uid, &user_cond); + if(ret_list && list_count(ret_list)) { + ListIterator itr = list_iterator_create(ret_list); + acct_user_rec_t *user = NULL; + fprintf(stderr," Users listed below have these " + "as their Default Accounts.\n"); + while((user = list_next(itr))) { + fprintf(stderr, " User - %-10.10s Account - %s\n", + user->name, user->default_acct); + } + list_iterator_destroy(itr); + rc = 1; + } + + if(ret_list) + list_destroy(ret_list); + + return rc; +} + extern int sacctmgr_add_account(int argc, char *argv[]) { int rc = SLURM_SUCCESS; @@ -335,7 +367,7 @@ extern int sacctmgr_add_account(int argc, char *argv[]) if(!qos_list) { qos_list = acct_storage_g_get_qos( - db_conn, NULL); + db_conn, my_uid, NULL); } addto_qos_char_list(add_qos_list, qos_list, argv[i]+end, option); @@ -364,7 +396,7 @@ extern int sacctmgr_add_account(int argc, char *argv[]) account_cond.assoc_cond = &assoc_cond; local_account_list = acct_storage_g_get_accounts( - db_conn, &account_cond); + db_conn, my_uid, &account_cond); } if(!local_account_list) { @@ -386,7 +418,7 @@ extern int sacctmgr_add_account(int argc, char *argv[]) List temp_list = NULL; acct_cluster_rec_t *cluster_rec = NULL; - temp_list = acct_storage_g_get_clusters(db_conn, NULL); + temp_list = acct_storage_g_get_clusters(db_conn, my_uid, NULL); if(!cluster_list) { exit_code=1; fprintf(stderr, @@ -428,7 +460,8 @@ extern int sacctmgr_add_account(int argc, char *argv[]) memset(&cluster_cond, 0, sizeof(acct_cluster_cond_t)); cluster_cond.cluster_list = cluster_list; - temp_list = acct_storage_g_get_clusters(db_conn, &cluster_cond); + temp_list = acct_storage_g_get_clusters(db_conn, my_uid, + &cluster_cond); itr_c = list_iterator_create(cluster_list); itr = list_iterator_create(temp_list); @@ -477,7 +510,7 @@ extern int sacctmgr_add_account(int argc, char *argv[]) assoc_cond.cluster_list = cluster_list; local_assoc_list = acct_storage_g_get_associations( - db_conn, &assoc_cond); + db_conn, my_uid, &assoc_cond); list_destroy(assoc_cond.acct_list); if(!local_assoc_list) { exit_code=1; @@ -866,7 +899,7 @@ extern int sacctmgr_list_account(int argc, char *argv[]) return SLURM_ERROR; } - acct_list = acct_storage_g_get_accounts(db_conn, acct_cond); + acct_list = acct_storage_g_get_accounts(db_conn, my_uid, acct_cond); destroy_acct_account_cond(acct_cond); if(!acct_list) { @@ -969,6 +1002,7 @@ extern int sacctmgr_list_account(int argc, char *argv[]) qos_list = acct_storage_g_get_qos( db_conn, + my_uid, NULL); } field->print_routine( @@ -983,6 +1017,7 @@ extern int sacctmgr_list_account(int argc, char *argv[]) qos_list = acct_storage_g_get_qos( db_conn, + my_uid, NULL); } field->print_routine( @@ -1104,6 +1139,7 @@ extern int sacctmgr_list_account(int argc, char *argv[]) qos_list = acct_storage_g_get_qos( db_conn, + my_uid, NULL); } field->print_routine( @@ -1117,6 +1153,7 @@ extern int sacctmgr_list_account(int argc, char *argv[]) qos_list = acct_storage_g_get_qos( db_conn, + my_uid, NULL); } field->print_routine( @@ -1356,7 +1393,23 @@ extern int sacctmgr_delete_account(int argc, char *argv[]) if(ret_list && list_count(ret_list)) { char *object = NULL; - ListIterator itr = list_iterator_create(ret_list); + ListIterator itr = NULL; + + /* Check to see if person is trying to remove a default + * account of a user. + */ + if(_isdefault(ret_list)) { + exit_code=1; + fprintf(stderr, " Please either remove accounts listed " + "above from list and resubmit,\n" + " or change these users default account to " + "remove the account(s).\n" + " Changes Discarded\n"); + list_destroy(ret_list); + acct_storage_g_commit(db_conn, 0); + return SLURM_ERROR; + } + itr = list_iterator_create(ret_list); if(set == 1) { printf(" Deleting accounts...\n"); } else if(set == 2 || set == 3) { diff --git a/src/sacctmgr/association_functions.c b/src/sacctmgr/association_functions.c index 90b739a2e..08cf98a07 100644 --- a/src/sacctmgr/association_functions.c +++ b/src/sacctmgr/association_functions.c @@ -478,7 +478,8 @@ extern int sacctmgr_list_association(int argc, char *argv[]) return SLURM_ERROR; } - assoc_list = acct_storage_g_get_associations(db_conn, assoc_cond); + assoc_list = acct_storage_g_get_associations(db_conn, my_uid, + assoc_cond); destroy_acct_association_cond(assoc_cond); if(!assoc_list) { diff --git a/src/sacctmgr/cluster_functions.c b/src/sacctmgr/cluster_functions.c index 1eeea8540..2d4efcf4e 100644 --- a/src/sacctmgr/cluster_functions.c +++ b/src/sacctmgr/cluster_functions.c @@ -38,6 +38,7 @@ \*****************************************************************************/ #include "src/sacctmgr/sacctmgr.h" +#include "src/common/uid.h" static int _set_cond(int *start, int argc, char *argv[], List cluster_list, @@ -206,7 +207,8 @@ extern int sacctmgr_add_cluster(int argc, char *argv[]) memset(&cluster_cond, 0, sizeof(acct_cluster_cond_t)); cluster_cond.cluster_list = name_list; - temp_list = acct_storage_g_get_clusters(db_conn, &cluster_cond); + temp_list = acct_storage_g_get_clusters(db_conn, my_uid, + &cluster_cond); if(!temp_list) { exit_code=1; fprintf(stderr, @@ -426,7 +428,8 @@ extern int sacctmgr_list_cluster(int argc, char *argv[]) return SLURM_ERROR; } - cluster_list = acct_storage_g_get_clusters(db_conn, cluster_cond); + cluster_list = acct_storage_g_get_clusters(db_conn, my_uid, + cluster_cond); destroy_acct_cluster_cond(cluster_cond); if(!cluster_list) { @@ -707,6 +710,7 @@ extern int sacctmgr_delete_cluster(int argc, char *argv[]) extern int sacctmgr_dump_cluster (int argc, char *argv[]) { acct_user_cond_t user_cond; + acct_user_rec_t *user = NULL; acct_association_cond_t assoc_cond; List assoc_list = NULL; List acct_list = NULL; @@ -714,9 +718,38 @@ extern int sacctmgr_dump_cluster (int argc, char *argv[]) List sacctmgr_assoc_list = NULL; char *cluster_name = NULL; char *file_name = NULL; + char *user_name = NULL; int i; FILE *fd = NULL; + memset(&user_cond, 0, sizeof(acct_user_cond_t)); + user_cond.with_coords = 1; + + user_list = acct_storage_g_get_users(db_conn, my_uid, &user_cond); + /* make sure this person running is an admin */ + user_name = uid_to_string(my_uid); + if(!(user = sacctmgr_find_user_from_list(user_list, user_name))) { + exit_code=1; + fprintf(stderr, " Your uid (%u) is not in the " + "accounting system, can't dump cluster.\n", my_uid); + xfree(user_name); + if(user_list) + list_destroy(user_list); + return SLURM_ERROR; + + } else { + if(user->admin_level < ACCT_ADMIN_SUPER_USER) { + exit_code=1; + fprintf(stderr, " Your user does not have sufficient " + "privileges to dump clusters.\n"); + if(user_list) + list_destroy(user_list); + xfree(user_name); + return SLURM_ERROR; + } + } + xfree(user_name); + for (i=0; i<argc; i++) { int end = parse_option_end(argv[i]); if(!end) { @@ -768,7 +801,8 @@ extern int sacctmgr_dump_cluster (int argc, char *argv[]) assoc_cond.cluster_list = list_create(NULL); list_append(assoc_cond.cluster_list, cluster_name); - assoc_list = acct_storage_g_get_associations(db_conn, &assoc_cond); + assoc_list = acct_storage_g_get_associations(db_conn, my_uid, + &assoc_cond); list_destroy(assoc_cond.cluster_list); if(!assoc_list) { @@ -779,18 +813,14 @@ extern int sacctmgr_dump_cluster (int argc, char *argv[]) } else if(!list_count(assoc_list)) { exit_code=1; fprintf(stderr, " Cluster %s returned nothing.", cluster_name); + list_destroy(assoc_list); xfree(cluster_name); return SLURM_ERROR; } sacctmgr_assoc_list = sacctmgr_get_hierarchical_list(assoc_list); - memset(&user_cond, 0, sizeof(acct_user_cond_t)); - user_cond.with_coords = 1; - - user_list = acct_storage_g_get_users(db_conn, &user_cond); - - acct_list = acct_storage_g_get_accounts(db_conn, NULL); + acct_list = acct_storage_g_get_accounts(db_conn, my_uid, NULL); fd = fopen(file_name, "w"); diff --git a/src/sacctmgr/common.c b/src/sacctmgr/common.c index 0f5d2db96..53a104083 100644 --- a/src/sacctmgr/common.c +++ b/src/sacctmgr/common.c @@ -247,7 +247,8 @@ extern acct_association_rec_t *sacctmgr_find_association(char *user, else list_append(assoc_cond.partition_list, ""); - assoc_list = acct_storage_g_get_associations(db_conn, &assoc_cond); + assoc_list = acct_storage_g_get_associations(db_conn, my_uid, + &assoc_cond); list_destroy(assoc_cond.acct_list); list_destroy(assoc_cond.cluster_list); @@ -287,7 +288,8 @@ extern acct_association_rec_t *sacctmgr_find_account_base_assoc(char *account, // info("looking for %s %s in %d", account, cluster, // list_count(sacctmgr_association_list)); - assoc_list = acct_storage_g_get_associations(db_conn, &assoc_cond); + assoc_list = acct_storage_g_get_associations(db_conn, my_uid, + &assoc_cond); list_destroy(assoc_cond.acct_list); list_destroy(assoc_cond.cluster_list); @@ -322,7 +324,8 @@ extern acct_user_rec_t *sacctmgr_find_user(char *name) list_append(assoc_cond.user_list, name); user_cond.assoc_cond = &assoc_cond; - user_list = acct_storage_g_get_users(db_conn, &user_cond); + user_list = acct_storage_g_get_users(db_conn, my_uid, + &user_cond); list_destroy(assoc_cond.user_list); @@ -350,7 +353,8 @@ extern acct_account_rec_t *sacctmgr_find_account(char *name) list_append(assoc_cond.acct_list, name); account_cond.assoc_cond = &assoc_cond; - account_list = acct_storage_g_get_accounts(db_conn, &account_cond); + account_list = acct_storage_g_get_accounts(db_conn, my_uid, + &account_cond); list_destroy(assoc_cond.acct_list); @@ -375,7 +379,8 @@ extern acct_cluster_rec_t *sacctmgr_find_cluster(char *name) cluster_cond.cluster_list = list_create(NULL); list_append(cluster_cond.cluster_list, name); - cluster_list = acct_storage_g_get_clusters(db_conn, &cluster_cond); + cluster_list = acct_storage_g_get_clusters(db_conn, my_uid, + &cluster_cond); list_destroy(cluster_cond.cluster_list); @@ -724,7 +729,7 @@ extern char *get_qos_complete_str(List qos_list, List num_qos_list) if(!qos_list || !list_count(qos_list) || !num_qos_list || !list_count(num_qos_list)) - return xstrdup("normal"); + return xstrdup(""); temp_list = list_create(NULL); @@ -747,7 +752,7 @@ extern char *get_qos_complete_str(List qos_list, List num_qos_list) list_destroy(temp_list); if(!print_this) - return xstrdup("normal"); + return xstrdup(""); return print_this; } diff --git a/src/sacctmgr/file_functions.c b/src/sacctmgr/file_functions.c index f85e6198e..412e79a43 100644 --- a/src/sacctmgr/file_functions.c +++ b/src/sacctmgr/file_functions.c @@ -38,6 +38,7 @@ \*****************************************************************************/ #include "src/sacctmgr/sacctmgr.h" +#include "src/common/uid.h" typedef struct { acct_admin_level_t admin; @@ -330,7 +331,7 @@ static sacctmgr_file_opts_t *_parse_options(char *options) if(!qos_list) { qos_list = acct_storage_g_get_qos( - db_conn, NULL); + db_conn, my_uid, NULL); } if(end > 2 && sub[end-1] == '=' && (sub[end-2] == '+' @@ -1296,7 +1297,7 @@ static int _print_file_sacctmgr_assoc_childern(FILE *fd, if(!qos_list) { qos_list = acct_storage_g_get_qos( - db_conn, + db_conn, my_uid, NULL); } temp_char = get_qos_complete_str( @@ -1439,10 +1440,14 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) char line[BUFFER_SIZE]; FILE *fd = NULL; char *parent = NULL; + char *file_name = NULL; char *cluster_name = NULL; + char *user_name = NULL; char object[25]; int start = 0, len = 0, i = 0; int lc=0, num_lines=0; + int start_clean=0; + int cluster_name_set=0; int rc = SLURM_SUCCESS; sacctmgr_file_opts_t *file_opts = NULL; @@ -1475,19 +1480,92 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) int set = 0; - fd = fopen(argv[0], "r"); + if(readonly_flag) { + exit_code = 1; + fprintf(stderr, "Can't run this command in readonly mode.\n"); + return; + } + + /* reset the connection to get the most recent stuff */ + acct_storage_g_commit(db_conn, 0); + + memset(&user_cond, 0, sizeof(acct_user_cond_t)); + user_cond.with_coords = 1; + curr_user_list = acct_storage_g_get_users(db_conn, my_uid, &user_cond); + + /* make sure this person running is an admin */ + user_name = uid_to_string(my_uid); + if(!(user = sacctmgr_find_user_from_list(curr_user_list, user_name))) { + exit_code=1; + fprintf(stderr, " Your uid (%u) is not in the " + "accounting system, can't load file.\n", my_uid); + if(curr_user_list) + list_destroy(curr_user_list); + xfree(user_name); + return; + + } else { + if(user->admin_level < ACCT_ADMIN_SUPER_USER) { + exit_code=1; + fprintf(stderr, " Your user does not have sufficient " + "privileges to load files.\n"); + if(curr_user_list) + list_destroy(curr_user_list); + xfree(user_name); + return; + } + } + xfree(user_name); + + for (i=0; i<argc; i++) { + int end = parse_option_end(argv[i]); + + if(!end && !strncasecmp(argv[i], "clean", 3)) { + start_clean = 1; + } else if(!end || !strncasecmp (argv[i], "File", 1)) { + if(file_name) { + exit_code=1; + fprintf(stderr, + " File name already set to %s\n", + file_name); + continue; + } + file_name = xstrdup(argv[i]+end); + } else if (!strncasecmp (argv[i], "Cluster", 3)) { + if(cluster_name) { + exit_code=1; + fprintf(stderr, + " Can only do one cluster at a time. " + "Already doing %s\n", cluster_name); + continue; + } + cluster_name = xstrdup(argv[i]+end); + cluster_name_set = 1; + } else { + exit_code=1; + fprintf(stderr, " Unknown option: %s\n", argv[i]); + } + } + + if(!file_name) { + exit_code=1; + xfree(cluster_name); + fprintf(stderr, + " No filename given, specify one with file=''\n"); + return; + + } + + fd = fopen(file_name, "r"); + xfree(file_name); if (fd == NULL) { exit_code=1; fprintf(stderr, " Unable to read \"%s\": %m\n", argv[0]); + xfree(cluster_name); return; } - curr_acct_list = acct_storage_g_get_accounts(db_conn, NULL); - curr_cluster_list = acct_storage_g_get_clusters(db_conn, NULL); - - memset(&user_cond, 0, sizeof(acct_user_cond_t)); - user_cond.with_coords = 1; - curr_user_list = acct_storage_g_get_users(db_conn, &user_cond); + curr_acct_list = acct_storage_g_get_accounts(db_conn, my_uid, NULL); /* These are new info so they need to be freed here */ acct_list = list_create(destroy_acct_account_rec); @@ -1499,8 +1577,7 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) mod_user_list = list_create(destroy_acct_user_rec); mod_assoc_list = list_create(destroy_acct_association_rec); - format_list = list_create(slurm_destroy_char); - + format_list = list_create(slurm_destroy_char); while((num_lines = _get_next_line(line, BUFFER_SIZE, fd)) > 0) { lc += num_lines; @@ -1540,7 +1617,6 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) object, lc); rc = SLURM_ERROR; break; - } start++; @@ -1548,7 +1624,7 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) || !strcasecmp("Cluster", object)) { acct_association_cond_t assoc_cond; - if(cluster_name) { + if(cluster_name && !cluster_name_set) { exit_code=1; fprintf(stderr, " You can only add one cluster " "at a time.\n"); @@ -1565,7 +1641,44 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) rc = SLURM_ERROR; break; } - cluster_name = xstrdup(file_opts->name); + + if(!cluster_name_set) + cluster_name = xstrdup(file_opts->name); + if(start_clean) { + acct_cluster_cond_t cluster_cond; + List ret_list = NULL; + + if(!commit_check("You requested to flush " + "the cluster before " + "adding it again.\n" + "Are you sure you want " + "to continue?")) { + printf("Aborted\n"); + break; + } + + memset(&cluster_cond, 0, + sizeof(acct_cluster_cond_t)); + cluster_cond.cluster_list = list_create(NULL); + list_append(cluster_cond.cluster_list, + cluster_name); + + notice_thread_init(); + ret_list = acct_storage_g_remove_clusters( + db_conn, my_uid, &cluster_cond); + notice_thread_fini(); + list_destroy(cluster_cond.cluster_list); + + if(!ret_list) { + exit_code=1; + fprintf(stderr, " There was a problem " + "removing the cluster.\n"); + rc = SLURM_ERROR; + break; + } + } + curr_cluster_list = acct_storage_g_get_clusters( + db_conn, my_uid, NULL); if(cluster_name) info("For cluster %s", cluster_name); @@ -1609,9 +1722,10 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) memset(&assoc_cond, 0, sizeof(acct_association_cond_t)); assoc_cond.cluster_list = list_create(NULL); + assoc_cond.without_parent_limits = 1; list_append(assoc_cond.cluster_list, cluster_name); curr_assoc_list = acct_storage_g_get_associations( - db_conn, &assoc_cond); + db_conn, my_uid, &assoc_cond); list_destroy(assoc_cond.cluster_list); if(!curr_assoc_list) { @@ -2030,8 +2144,9 @@ extern void load_sacctmgr_cfg_file (int argc, char *argv[]) set = 1; } END_TIMER2("add cluster"); - - info("Done adding cluster in %s", TIME_STR); + + if(set) + info("Done adding cluster in %s", TIME_STR); if(rc == SLURM_SUCCESS) { if(set) { diff --git a/src/sacctmgr/qos_functions.c b/src/sacctmgr/qos_functions.c index 95a466192..f455f1d25 100644 --- a/src/sacctmgr/qos_functions.c +++ b/src/sacctmgr/qos_functions.c @@ -174,7 +174,7 @@ extern int sacctmgr_add_qos(int argc, char *argv[]) } - local_qos_list = acct_storage_g_get_qos(db_conn, NULL); + local_qos_list = acct_storage_g_get_qos(db_conn, my_uid, NULL); if(!local_qos_list) { exit_code=1; @@ -316,7 +316,7 @@ extern int sacctmgr_list_qos(int argc, char *argv[]) list_destroy(print_fields_list); return SLURM_ERROR; } - qos_list = acct_storage_g_get_qos(db_conn, qos_cond); + qos_list = acct_storage_g_get_qos(db_conn, my_uid, qos_cond); destroy_acct_qos_cond(qos_cond); if(!qos_list) { diff --git a/src/sacctmgr/sacctmgr.c b/src/sacctmgr/sacctmgr.c index 76eb21cc0..98cdb9335 100644 --- a/src/sacctmgr/sacctmgr.c +++ b/src/sacctmgr/sacctmgr.c @@ -171,7 +171,10 @@ main (int argc, char *argv[]) log_alter(opts, 0, NULL); } - db_conn = acct_storage_g_get_connection(false, rollback_flag); + /* always do a rollback. If you don't then if there is an + * error you can not rollback ;) + */ + db_conn = acct_storage_g_get_connection(false, 1); my_uid = getuid(); if (input_field_count) @@ -480,6 +483,9 @@ static void _add_it (int argc, char *argv[]) return; } + /* reset the connection to get the most recent stuff */ + acct_storage_g_commit(db_conn, 0); + /* First identify the entity to add */ if (strncasecmp (argv[0], "Account", 1) == 0) { error_code = sacctmgr_add_account((argc - 1), &argv[1]); @@ -515,6 +521,9 @@ static void _show_it (int argc, char *argv[]) { int error_code = SLURM_SUCCESS; + /* reset the connection to get the most recent stuff */ + acct_storage_g_commit(db_conn, 0); + /* First identify the entity to list */ if (strncasecmp (argv[0], "Account", 2) == 0) { error_code = sacctmgr_list_account((argc - 1), &argv[1]); @@ -558,6 +567,9 @@ static void _modify_it (int argc, char *argv[]) return; } + /* reset the connection to get the most recent stuff */ + acct_storage_g_commit(db_conn, 0); + /* First identify the entity to modify */ if (strncasecmp (argv[0], "Account", 1) == 0) { error_code = sacctmgr_modify_account((argc - 1), &argv[1]); @@ -593,6 +605,9 @@ static void _delete_it (int argc, char *argv[]) return; } + /* reset the connection to get the most recent stuff */ + acct_storage_g_commit(db_conn, 0); + /* First identify the entity to delete */ if (strncasecmp (argv[0], "Account", 1) == 0) { error_code = sacctmgr_delete_account((argc - 1), &argv[1]); diff --git a/src/sacctmgr/txn_functions.c b/src/sacctmgr/txn_functions.c index 17ea28bd8..e1ddb2902 100644 --- a/src/sacctmgr/txn_functions.c +++ b/src/sacctmgr/txn_functions.c @@ -185,7 +185,7 @@ extern int sacctmgr_list_txn(int argc, char *argv[]) return SLURM_ERROR; } - txn_list = acct_storage_g_get_txn(db_conn, txn_cond); + txn_list = acct_storage_g_get_txn(db_conn, my_uid, txn_cond); destroy_acct_txn_cond(txn_cond); if(!txn_list) { diff --git a/src/sacctmgr/user_functions.c b/src/sacctmgr/user_functions.c index 1b5c9fb19..66d428961 100644 --- a/src/sacctmgr/user_functions.c +++ b/src/sacctmgr/user_functions.c @@ -37,6 +37,7 @@ \*****************************************************************************/ #include "src/sacctmgr/sacctmgr.h" +#include "src/common/uid.h" static int _set_cond(int *start, int argc, char *argv[], acct_user_cond_t *user_cond, @@ -138,7 +139,7 @@ static int _set_cond(int *start, int argc, char *argv[], if(!qos_list) { qos_list = acct_storage_g_get_qos( - db_conn, NULL); + db_conn, my_uid, NULL); } addto_qos_char_list(user_cond->qos_list, qos_list, @@ -245,7 +246,7 @@ static int _set_rec(int *start, int argc, char *argv[], if(!qos_list) { qos_list = acct_storage_g_get_qos( - db_conn, NULL); + db_conn, my_uid, NULL); } if(end > 2 && argv[i][end-1] == '=' @@ -310,7 +311,7 @@ extern int sacctmgr_add_user(int argc, char *argv[]) int limit_set = 0, mins; int first = 1; int acct_first = 1; - + /* if(!list_count(sacctmgr_cluster_list)) { */ /* printf(" Can't add users, no cluster defined yet.\n" */ /* " Please contact your administrator.\n"); */ @@ -382,7 +383,7 @@ extern int sacctmgr_add_user(int argc, char *argv[]) if(!qos_list) { qos_list = acct_storage_g_get_qos( - db_conn, NULL); + db_conn, my_uid, NULL); } addto_qos_char_list(add_qos_list, qos_list, @@ -408,7 +409,7 @@ extern int sacctmgr_add_user(int argc, char *argv[]) user_cond.assoc_cond = assoc_cond; local_user_list = acct_storage_g_get_users( - db_conn, &user_cond); + db_conn, my_uid, &user_cond); } @@ -433,7 +434,7 @@ extern int sacctmgr_add_user(int argc, char *argv[]) account_cond.assoc_cond = assoc_cond; local_acct_list = acct_storage_g_get_accounts( - db_conn, &account_cond); + db_conn, my_uid, &account_cond); } @@ -451,7 +452,8 @@ extern int sacctmgr_add_user(int argc, char *argv[]) List cluster_list = NULL; acct_cluster_rec_t *cluster_rec = NULL; - cluster_list = acct_storage_g_get_clusters(db_conn, NULL); + cluster_list = acct_storage_g_get_clusters(db_conn, + my_uid, NULL); if(!cluster_list) { exit_code=1; fprintf(stderr, @@ -497,12 +499,13 @@ extern int sacctmgr_add_user(int argc, char *argv[]) query_assoc_cond.acct_list = assoc_cond->acct_list; query_assoc_cond.cluster_list = assoc_cond->cluster_list; local_assoc_list = acct_storage_g_get_associations( - db_conn, &query_assoc_cond); + db_conn, my_uid, &query_assoc_cond); itr = list_iterator_create(assoc_cond->user_list); while((name = list_next(itr))) { user = NULL; if(!sacctmgr_find_user_from_list(local_user_list, name)) { + uid_t pw_uid; if(!default_acct) { exit_code=1; fprintf(stderr, " Need a default account for " @@ -523,6 +526,22 @@ extern int sacctmgr_add_user(int argc, char *argv[]) } first = 0; } + pw_uid = uid_from_string(name); + if(pw_uid == (uid_t) -1) { + char *warning = xstrdup_printf( + "There is no uid for user '%s'" + "\nAre you sure you want to continue?", + name); + + if(!commit_check(warning)) { + xfree(warning); + rc = SLURM_ERROR; + list_flush(user_list); + goto no_default; + } + xfree(warning); + } + user = xmalloc(sizeof(acct_user_rec_t)); user->assoc_list = list_create(NULL); user->name = xstrdup(name); @@ -542,6 +561,7 @@ extern int sacctmgr_add_user(int argc, char *argv[]) } user->admin_level = admin_level; + xstrfmtcat(user_str, " %s\n", name); list_append(user_list, user); @@ -1018,7 +1038,7 @@ extern int sacctmgr_list_user(int argc, char *argv[]) return SLURM_ERROR; } - user_list = acct_storage_g_get_users(db_conn, user_cond); + user_list = acct_storage_g_get_users(db_conn, my_uid, user_cond); destroy_acct_user_cond(user_cond); if(!user_list) { @@ -1132,6 +1152,7 @@ extern int sacctmgr_list_user(int argc, char *argv[]) qos_list = acct_storage_g_get_qos( db_conn, + my_uid, NULL); } field->print_routine( @@ -1146,6 +1167,7 @@ extern int sacctmgr_list_user(int argc, char *argv[]) qos_list = acct_storage_g_get_qos( db_conn, + my_uid, NULL); } field->print_routine( @@ -1268,6 +1290,7 @@ extern int sacctmgr_list_user(int argc, char *argv[]) qos_list = acct_storage_g_get_qos( db_conn, + my_uid, NULL); } field->print_routine( @@ -1280,6 +1303,7 @@ extern int sacctmgr_list_user(int argc, char *argv[]) qos_list = acct_storage_g_get_qos( db_conn, + my_uid, NULL); } field->print_routine( diff --git a/src/salloc/Makefile.am b/src/salloc/Makefile.am index e2da3019f..05fb0f3eb 100644 --- a/src/salloc/Makefile.am +++ b/src/salloc/Makefile.am @@ -3,7 +3,7 @@ AUTOMAKE_OPTIONS = foreign CLEANFILES = core.* -INCLUDES = -I$(top_srcdir) +INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) bin_PROGRAMS = salloc diff --git a/src/salloc/Makefile.in b/src/salloc/Makefile.in index 91ae04a40..03315a66a 100644 --- a/src/salloc/Makefile.in +++ b/src/salloc/Makefile.in @@ -263,7 +263,7 @@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ AUTOMAKE_OPTIONS = foreign CLEANFILES = core.* -INCLUDES = -I$(top_srcdir) +INCLUDES = -I$(top_srcdir) $(BG_INCLUDES) salloc_SOURCES = salloc.c salloc.h opt.c opt.h convenience_libs = $(top_builddir)/src/api/libslurm.o -ldl salloc_LDADD = \ diff --git a/src/salloc/opt.c b/src/salloc/opt.c index a46937536..6af234e29 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -971,6 +971,20 @@ static void _opt_args(int argc, char **argv) exit(1); } +/* _get_shell - return a string containing the default shell for this user + * NOTE: This function is NOT reentrant (see getpwuid_r if needed) */ +static char *_get_shell(void) +{ + struct passwd *pw_ent_ptr; + + pw_ent_ptr = getpwuid(opt.uid); + if (!pw_ent_ptr) { + pw_ent_ptr = getpwnam("nobody"); + error("warning - no user information for user %d", opt.uid); + } + return pw_ent_ptr->pw_shell; +} + /* * _opt_verify : perform some post option processing verification * @@ -991,8 +1005,11 @@ static bool _opt_verify(void) opt.job_name = base_name(command_argv[0]); if ((opt.no_shell == false) && (command_argc == 0)) { - error("A local command is a required parameter!"); - verified = false; + /* Using default shell as the user command */ + command_argc = 1; + command_argv = (char **) xmalloc(sizeof(char *) * 2); + command_argv[0] = _get_shell(); + command_argv[1] = NULL; } @@ -1351,7 +1368,7 @@ static void _usage(void) " [--bell] [--no-bell] [--kill-command[=signal]]\n" " [--nodefile=file] [--nodelist=hosts] [--exclude=hosts]\n" " [--network=type] [--mem-per-cpu=MB]\n" -" executable [args...]\n"); +" [executable [args...]]\n"); } static void _help(void) @@ -1359,7 +1376,7 @@ static void _help(void) slurm_ctl_conf_t *conf; printf ( -"Usage: salloc [OPTIONS...] executable [args...]\n" +"Usage: salloc [OPTIONS...] [executable [args...]]\n" "\n" "Parallel run options:\n" " -N, --nodes=N number of nodes on which to run (N = min[-max])\n" diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index de913220f..c1a482583 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -50,6 +50,14 @@ #include "src/salloc/salloc.h" #include "src/salloc/opt.h" +#ifdef HAVE_BG +#include "src/api/job_info.h" +#include "src/api/node_select_info.h" +#include "src/common/node_select.h" +#include "src/plugins/select/bluegene/plugin/bg_boot_time.h" +#include "src/plugins/select/bluegene/wrap_rm_api.h" +#endif + #define MAX_RETRIES 3 char **command_argv; @@ -76,6 +84,16 @@ static void _user_msg_handler(srun_user_msg_t *msg); static void _ping_handler(srun_ping_msg_t *msg); static void _node_fail_handler(srun_node_fail_msg_t *msg); +#ifdef HAVE_BG + +#define POLL_SLEEP 3 /* retry interval in seconds */ + +static int _wait_bluegene_block_ready( + resource_allocation_response_msg_t *alloc); +static int _blocks_dealloc(); +#endif + + int main(int argc, char *argv[]) { log_options_t logopt = LOG_OPTS_STDERR_ONLY; @@ -187,6 +205,13 @@ int main(int argc, char *argv[]) * Allocation granted! */ info("Granted job allocation %d", alloc->job_id); +#ifdef HAVE_BG + if (!_wait_bluegene_block_ready(alloc)) { + error("Something is wrong with the boot of the block."); + goto relinquish; + } + +#endif if (opt.bell == BELL_ALWAYS || (opt.bell == BELL_AFTER_DELAY && ((after - before) > DEFAULT_BELL_DELAY))) { @@ -520,3 +545,93 @@ static void _node_fail_handler(srun_node_fail_msg_t *msg) { error("Node failure on %s", msg->nodelist); } + +#ifdef HAVE_BG +/* returns 1 if job and nodes are ready for job to begin, 0 otherwise */ +static int _wait_bluegene_block_ready(resource_allocation_response_msg_t *alloc) +{ + int is_ready = 0, i, rc; + char *block_id = NULL; + int cur_delay = 0; + int max_delay = BG_FREE_PREVIOUS_BLOCK + BG_MIN_BLOCK_BOOT + + (BG_INCR_BLOCK_BOOT * alloc->node_cnt); + + select_g_get_jobinfo(alloc->select_jobinfo, SELECT_DATA_BLOCK_ID, + &block_id); + + for (i=0; (cur_delay < max_delay); i++) { + if(i == 1) + info("Waiting for block %s to become ready for job", + block_id); + if (i) { + sleep(POLL_SLEEP); + rc = _blocks_dealloc(); + if ((rc == 0) || (rc == -1)) + cur_delay += POLL_SLEEP; + debug("still waiting"); + } + + rc = slurm_job_node_ready(alloc->job_id); + + if (rc == READY_JOB_FATAL) + break; /* fatal error */ + if (rc == READY_JOB_ERROR) /* error */ + continue; /* retry */ + if ((rc & READY_JOB_STATE) == 0) /* job killed */ + break; + if (rc & READY_NODE_STATE) { /* job and node ready */ + is_ready = 1; + break; + } + } + + if (is_ready) + info("Block %s is ready for job", block_id); + else + error("Block %s still not ready", block_id); + xfree(block_id); + + return is_ready; +} + +/* + * Test if any BG blocks are in deallocating state since they are + * probably related to this job we will want to sleep longer + * RET 1: deallocate in progress + * 0: no deallocate in progress + * -1: error occurred + */ +static int _blocks_dealloc() +{ + static node_select_info_msg_t *bg_info_ptr = NULL, *new_bg_ptr = NULL; + int rc = 0, error_code = 0, i; + + if (bg_info_ptr) { + error_code = slurm_load_node_select(bg_info_ptr->last_update, + &new_bg_ptr); + if (error_code == SLURM_SUCCESS) + select_g_free_node_info(&bg_info_ptr); + else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) { + error_code = SLURM_SUCCESS; + new_bg_ptr = bg_info_ptr; + } + } else { + error_code = slurm_load_node_select((time_t) NULL, &new_bg_ptr); + } + + if (error_code) { + error("slurm_load_partitions: %s\n", + slurm_strerror(slurm_get_errno())); + return -1; + } + for (i=0; i<new_bg_ptr->record_count; i++) { + if(new_bg_ptr->bg_info_array[i].state + == RM_PARTITION_DEALLOCATING) { + rc = 1; + break; + } + } + bg_info_ptr = new_bg_ptr; + return rc; +} +#endif diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c index ceaf73c7f..31d8f7134 100644 --- a/src/sbatch/sbatch.c +++ b/src/sbatch/sbatch.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * sbatch.c - Submit a SLURM batch script. * - * $Id: sbatch.c 14812 2008-08-19 00:10:09Z jette $ + * $Id: sbatch.c 14958 2008-09-03 17:27:21Z jette $ ***************************************************************************** * Copyright (C) 2006-2007 The Regents of the University of California. * Copyright (C) 2008 Lawrence Livermore National Security. @@ -346,6 +346,24 @@ static bool contains_null_char(const void *buf, int size) return false; } +/* + * Checks if the buffer contains any DOS linebreak (\r\n). + */ +static bool contains_dos_linebreak(const void *buf, int size) +{ + char *str = (char *)buf; + char prev_char = '\0'; + int i; + + for (i = 0; i < size; i++) { + if (prev_char == '\r' && str[i] == '\n') + return true; + prev_char = str[i]; + } + + return false; +} + /* * If "filename" is NULL, the batch script is read from standard input. */ @@ -409,6 +427,10 @@ static void *get_script_buffer(const char *filename, int *size) error("The SLURM controller does not allow scripts that"); error("contain a NULL character '\\0'."); goto fail; + } else if (contains_dos_linebreak(buf, script_size)) { + error("Batch script contains DOS line breaks (\\r\\n)"); + error("instead of expected UNIX line breaks (\\n)."); + goto fail; } *size = script_size; diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c index a91878257..387091e15 100644 --- a/src/scontrol/scontrol.c +++ b/src/scontrol/scontrol.c @@ -49,6 +49,7 @@ int exit_flag; /* program to terminate if =1 */ int input_words; /* number of words of input permitted */ int one_liner; /* one record per line if =1 */ int quiet_flag; /* quiet=1, verbose=-1, normal=0 */ +int verbosity; /* count of "-v" options */ static void _delete_it (int argc, char *argv[]); static int _get_command (int *argc, char *argv[]); @@ -90,6 +91,7 @@ main (int argc, char *argv[]) exit_flag = 0; input_field_count = 0; quiet_flag = 0; + verbosity = 0; log_init("scontrol", opts, SYSLOG_FACILITY_DAEMON, NULL); if (getenv ("SCONTROL_ALL")) @@ -99,7 +101,8 @@ main (int argc, char *argv[]) long_options, &option_index)) != -1) { switch (opt_char) { case (int)'?': - fprintf(stderr, "Try \"scontrol --help\" for more information\n"); + fprintf(stderr, "Try \"scontrol --help\" for " + "more information\n"); exit(1); break; case (int)'a': @@ -120,6 +123,7 @@ main (int argc, char *argv[]) break; case (int)'v': quiet_flag = -1; + verbosity++; break; case (int)'V': _print_version(); @@ -127,11 +131,17 @@ main (int argc, char *argv[]) break; default: exit_code = 1; - fprintf(stderr, "getopt error, returned %c\n", opt_char); + fprintf(stderr, "getopt error, returned %c\n", + opt_char); exit(exit_code); } } + if (verbosity) { + opts.stderr_level += verbosity; + log_alter(opts, SYSLOG_FACILITY_USER, NULL); + } + if (argc > MAX_INPUT_FIELDS) /* bogus input, but continue anyway */ input_words = argc; else diff --git a/src/sinfo/opts.c b/src/sinfo/opts.c index 1313dfbb6..58cb1aba6 100644 --- a/src/sinfo/opts.c +++ b/src/sinfo/opts.c @@ -1,7 +1,8 @@ /****************************************************************************\ * opts.c - sinfo command line option processing functions ***************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Joey Ekstrom <ekstrom1@llnl.gov>, Morris Jette <jette1@llnl.gov> * LLNL-CODE-402394. @@ -122,7 +123,8 @@ extern void parse_command_line(int argc, char *argv[]) long_options, &option_index)) != -1) { switch (opt_char) { case (int)'?': - fprintf(stderr, "Try \"sinfo --help\" for more information\n"); + fprintf(stderr, + "Try \"sinfo --help\" for more information\n"); exit(1); break; case (int)'a': diff --git a/src/sinfo/sinfo.c b/src/sinfo/sinfo.c index 13d797714..7cef6e0c7 100644 --- a/src/sinfo/sinfo.c +++ b/src/sinfo/sinfo.c @@ -1,9 +1,8 @@ /*****************************************************************************\ * sinfo.c - Report overall state the system - * - * $Id: sinfo.c 14203 2008-06-06 16:58:44Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Joey Ekstrom <ekstrom1@llnl.gov>, Morris Jette <jette1@llnl.gov> * LLNL-CODE-402394. diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 3c1959ae9..e11631ae8 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -708,7 +708,9 @@ static void *_slurmctld_rpc_mgr(void *no_data) { slurm_fd newsockfd; slurm_fd sockfd; - slurm_addr cli_addr; + slurm_addr cli_addr, srv_addr; + uint16_t port; + char ip[32]; pthread_t thread_id_rpc_req; pthread_attr_t thread_attr_rpc_req; int no_thread; @@ -735,6 +737,9 @@ static void *_slurmctld_rpc_mgr(void *no_data) == SLURM_SOCKET_ERROR) fatal("slurm_init_msg_engine_port error %m"); unlock_slurmctld(config_read_lock); + slurm_get_stream_addr(sockfd, &srv_addr); + slurm_get_ip_str(&srv_addr, &port, ip, sizeof(ip)); + debug2("slurmctld listening on %s:%d", ip, ntohs(port)); /* Prepare to catch SIGUSR1 to interrupt accept(). * This signal is generated by the slurmctld signal @@ -1171,6 +1176,7 @@ static void *_slurmctld_background(void *no_data) _accounting_cluster_ready(); unlock_slurmctld(node_read_lock); } + /* Reassert this machine as the primary controller. * A network or security problem could result in * the backup controller assuming control even @@ -1369,7 +1375,6 @@ static int _shutdown_backup_controller(int wait_time) { int rc; slurm_msg_t req; - DEF_TIMERS; slurm_msg_t_init(&req); if ((slurmctld_conf.backup_addr == NULL) || @@ -1384,11 +1389,9 @@ static int _shutdown_backup_controller(int wait_time) /* send request message */ req.msg_type = REQUEST_CONTROL; - START_TIMER; if (slurm_send_recv_rc_msg_only_one(&req, &rc, (CONTROL_TIMEOUT * 1000)) < 0) { - END_TIMER2("_shutdown_backup_controller"); - error("_shutdown_backup_controller:send/recv: %m, %s", TIME_STR); + error("_shutdown_backup_controller:send/recv: %m"); return SLURM_ERROR; } if (rc == ESLURM_DISABLED) diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index f8c3c7d30..d0d2dee8a 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -3177,20 +3177,23 @@ static int _list_find_job_id(void *job_entry, void *key) */ static int _list_find_job_old(void *job_entry, void *key) { - time_t now = time(NULL); - time_t kill_age = now - (slurmctld_conf.kill_wait + 20); - time_t min_age = now - slurmctld_conf.min_job_age; + time_t kill_age, min_age, now = time(NULL);; struct job_record *job_ptr = (struct job_record *)job_entry; - if ( (job_ptr->job_state & JOB_COMPLETING) && - (job_ptr->end_time < kill_age) ) { - re_kill_job(job_ptr); + if (job_ptr->job_state & JOB_COMPLETING) { + kill_age = now - (slurmctld_conf.kill_wait + + 2 * slurm_get_msg_timeout()); + if (job_ptr->time_last_active < kill_age) { + job_ptr->time_last_active = now; + re_kill_job(job_ptr); + } return 0; /* Job still completing */ } if (slurmctld_conf.min_job_age == 0) return 0; /* No job record purging */ + min_age = now - slurmctld_conf.min_job_age; if (job_ptr->end_time > min_age) return 0; /* Too new to purge */ @@ -4276,53 +4279,24 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) } if (job_specs->account) { - if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) { - info("update_job: attempt to modify account for " - "non-pending job_id %u", job_specs->job_id); - error_code = ESLURM_DISABLED; - } else { - acct_association_rec_t assoc_rec, *assoc_ptr; - bzero(&assoc_rec, sizeof(acct_association_rec_t)); - - assoc_rec.uid = job_ptr->user_id; - assoc_rec.partition = job_ptr->partition; - assoc_rec.acct = job_specs->account; - if (assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec, - accounting_enforce, - &assoc_ptr)) { - info("job_update: invalid account %s for " - "job_id %u", - job_specs->account, job_ptr->job_id); - error_code = ESLURM_INVALID_ACCOUNT; - } else { - xfree(job_ptr->account); - if (assoc_rec.acct[0] != '\0') { - job_ptr->account = xstrdup(assoc_rec.acct); - info("update_job: setting account to " - "%s for job_id %u", - assoc_rec.acct, job_ptr->job_id); - } else { - info("update_job: cleared account for " - "job_id %u", - job_specs->job_id); - } - job_ptr->assoc_id = assoc_rec.id; - job_ptr->assoc_ptr = (void *) assoc_ptr; - } - } + int rc = update_job_account("update_job", job_ptr, + job_specs->account); + if (rc != SLURM_SUCCESS) + error_code = rc; } if (job_specs->ntasks_per_node != (uint16_t) NO_VAL) { if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) error_code = ESLURM_DISABLED; else if (super_user) { - detail_ptr->ntasks_per_node = job_specs->ntasks_per_node; + detail_ptr->ntasks_per_node = job_specs-> + ntasks_per_node; info("update_job: setting ntasks_per_node to %u for " "job_id %u", job_specs->ntasks_per_node, job_specs->job_id); } else { - error("Not super user: setting ntasks_oper_node to job %u", - job_specs->job_id); + error("Not super user: setting ntasks_oper_node to " + "job %u", job_specs->job_id); error_code = ESLURM_ACCESS_DENIED; } } @@ -5631,7 +5605,8 @@ extern int job_cancel_by_assoc_id(uint32_t assoc_id) if ((job_ptr->assoc_id != assoc_id) || IS_JOB_FINISHED(job_ptr)) continue; - info("Association deleted, cancelling job %u", job_ptr->job_id); + info("Association deleted, cancelling job %u", + job_ptr->job_id); job_signal(job_ptr->job_id, SIGKILL, 0, 0); job_ptr->state_reason = FAIL_BANK_ACCOUNT; cnt++; @@ -5639,3 +5614,55 @@ extern int job_cancel_by_assoc_id(uint32_t assoc_id) list_iterator_destroy(job_iterator); return cnt; } + +/* + * Modify the account associated with a pending job + * IN module - where this is called from + * IN job_ptr - pointer to job which should be modified + * IN new_account - desired account name + * RET SLURM_SUCCESS or error code + */ +extern int update_job_account(char *module, struct job_record *job_ptr, + char *new_account) +{ + acct_association_rec_t assoc_rec, *assoc_ptr; + + if ((!IS_JOB_PENDING(job_ptr)) || (job_ptr->details == NULL)) { + info("%s: attempt to modify account for non-pending " + "job_id %u", module, job_ptr->job_id); + return ESLURM_DISABLED; + } + + + bzero(&assoc_rec, sizeof(acct_association_rec_t)); + assoc_rec.uid = job_ptr->user_id; + assoc_rec.partition = job_ptr->partition; + assoc_rec.acct = new_account; + if (assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec, + accounting_enforce, &assoc_ptr)) { + info("%s: invalid account %s for job_id %u", + module, new_account, job_ptr->job_id); + return ESLURM_INVALID_ACCOUNT; + } + + + xfree(job_ptr->account); + if (assoc_rec.acct[0] != '\0') { + job_ptr->account = xstrdup(assoc_rec.acct); + info("%s: setting account to %s for job_id %u", + module, assoc_rec.acct, job_ptr->job_id); + } else { + info("%s: cleared account for job_id %u", + module, job_ptr->job_id); + } + job_ptr->assoc_id = assoc_rec.id; + job_ptr->assoc_ptr = (void *) assoc_ptr; + + if (job_ptr->details && job_ptr->details->begin_time) { + /* Update account associated with the eligible time */ + jobacct_storage_g_job_start(acct_db_conn, job_ptr); + } + last_job_update = time(NULL); + + return SLURM_SUCCESS; +} diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index e9120c332..6392dace2 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -556,12 +556,11 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, error_code = select_g_get_info_from_plugin ( SELECT_BITMAP, &partially_idle_node_bitmap); + if (error_code != SLURM_SUCCESS) { + FREE_NULL_BITMAP(partially_idle_node_bitmap); + return error_code; + } } - - if (error_code != SLURM_SUCCESS) { - FREE_NULL_BITMAP(partially_idle_node_bitmap); - return error_code; - } } if (job_ptr->details->req_node_bitmap) { /* specific nodes required */ diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 20f05fba9..0fe112c08 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -963,7 +963,7 @@ extern int job_req_node_filter(struct job_record *job_ptr, * job_requeue - Requeue a running or pending batch job * IN uid - user id of user issuing the RPC * IN job_id - id of the job to be requeued - * IN conn_fd - file descriptor on which to send reply + * IN conn_fd - file descriptor on which to send reply, -1 if none * RET 0 on success, otherwise ESLURM error code */ extern int job_requeue (uid_t uid, uint32_t job_id, slurm_fd conn_fd); @@ -1433,6 +1433,16 @@ extern int sync_job_files(void); */ extern int update_job (job_desc_msg_t * job_specs, uid_t uid); +/* + * Modify the account associated with a pending job + * IN module - where this is called from + * IN job_ptr - pointer to job which should be modified + * IN new_account - desired account name + * RET SLURM_SUCCESS or error code + */ +extern int update_job_account(char *module, struct job_record *job_ptr, + char *new_account); + /* Reset nodes_completing field for all jobs */ extern void update_job_nodes_completing(void); diff --git a/src/slurmd/slurmstepd/task.c b/src/slurmd/slurmstepd/task.c index 087e0bcc8..a16c21901 100644 --- a/src/slurmd/slurmstepd/task.c +++ b/src/slurmd/slurmstepd/task.c @@ -40,15 +40,17 @@ # include "config.h" #endif -#include <sys/wait.h> -#include <sys/stat.h> -#include <sys/param.h> -#include <unistd.h> -#include <pwd.h> -#include <grp.h> -#include <string.h> #include <assert.h> #include <ctype.h> +#include <fcntl.h> +#include <grp.h> +#include <pwd.h> +#include <string.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> #if HAVE_STDLIB_H # include <stdlib.h> @@ -294,7 +296,7 @@ exec_task(slurmd_job_t *job, int i, int waitfd) { char c; uint32_t *gtids; /* pointer to arrary of ranks */ - int j; + int fd, j; int rc; slurmd_task_info_t *task = job->task[i]; @@ -430,6 +432,21 @@ exec_task(slurmd_job_t *job, int i, int waitfd) /* * print error message and clean up if execve() returns: */ + if ((errno == ENOENT) && + ((fd = open(task->argv[0], O_RDONLY)) >= 0)) { + char buf[256], *eol; + int sz; + sz = read(fd, buf, sizeof(buf)); + if ((sz >= 3) && (strncmp(buf, "#!", 2) == 0)) { + eol = strchr(buf, '\n'); + if (eol) + eol[0] = '\0'; + else + buf[sizeof(buf)-1] = '\0'; + error("execve(): bad interpreter(%s): %m", buf+2); + exit(errno); + } + } error("execve(): %s: %m", task->argv[0]); exit(errno); } diff --git a/src/slurmdbd/proc_req.c b/src/slurmdbd/proc_req.c index 089171f22..cad5fbb1a 100644 --- a/src/slurmdbd/proc_req.c +++ b/src/slurmdbd/proc_req.c @@ -679,7 +679,8 @@ static int _get_accounts(void *db_conn, return SLURM_ERROR; } - list_msg.my_list = acct_storage_g_get_accounts(db_conn, get_msg->cond); + list_msg.my_list = acct_storage_g_get_accounts(db_conn, *uid, + get_msg->cond); slurmdbd_free_cond_msg(DBD_GET_ACCOUNTS, get_msg); @@ -710,7 +711,7 @@ static int _get_assocs(void *db_conn, } list_msg.my_list = acct_storage_g_get_associations( - db_conn, get_msg->cond); + db_conn, *uid, get_msg->cond); slurmdbd_free_cond_msg(DBD_GET_ASSOCS, get_msg); @@ -741,7 +742,7 @@ static int _get_clusters(void *db_conn, } list_msg.my_list = acct_storage_g_get_clusters( - db_conn, get_msg->cond); + db_conn, *uid, get_msg->cond); slurmdbd_free_cond_msg(DBD_GET_CLUSTERS, get_msg); @@ -787,7 +788,7 @@ static int _get_jobs(void *db_conn, } list_msg.my_list = jobacct_storage_g_get_jobs( - db_conn, + db_conn, *uid, get_jobs_msg->selected_steps, get_jobs_msg->selected_parts, &sacct_params); slurmdbd_free_get_jobs_msg(get_jobs_msg); @@ -822,7 +823,7 @@ static int _get_jobs_cond(void *db_conn, } list_msg.my_list = jobacct_storage_g_get_jobs_cond( - db_conn, cond_msg->cond); + db_conn, *uid, cond_msg->cond); slurmdbd_free_cond_msg(DBD_GET_JOBS_COND, cond_msg); *out_buffer = init_buf(1024); @@ -851,7 +852,8 @@ static int _get_qos(void *db_conn, return SLURM_ERROR; } - list_msg.my_list = acct_storage_g_get_qos(db_conn, cond_msg->cond); + list_msg.my_list = acct_storage_g_get_qos(db_conn, *uid, + cond_msg->cond); slurmdbd_free_cond_msg(DBD_GET_QOS, cond_msg); *out_buffer = init_buf(1024); @@ -880,7 +882,8 @@ static int _get_txn(void *db_conn, return SLURM_ERROR; } - list_msg.my_list = acct_storage_g_get_txn(db_conn, cond_msg->cond); + list_msg.my_list = acct_storage_g_get_txn(db_conn, *uid, + cond_msg->cond); slurmdbd_free_cond_msg(DBD_GET_TXN, cond_msg); *out_buffer = init_buf(1024); @@ -898,7 +901,7 @@ static int _get_usage(uint16_t type, void *db_conn, dbd_usage_msg_t *get_msg = NULL; dbd_usage_msg_t got_msg; uint16_t ret_type = 0; - int (*my_function) (void *db_conn, void *object, + int (*my_function) (void *db_conn, uid_t uid, void *object, time_t start, time_t end); int rc = SLURM_SUCCESS; char *comment = NULL; @@ -928,7 +931,7 @@ static int _get_usage(uint16_t type, void *db_conn, return SLURM_ERROR; } - rc = (*(my_function))(db_conn, get_msg->rec, + rc = (*(my_function))(db_conn, *uid, get_msg->rec, get_msg->start, get_msg->end); slurmdbd_free_usage_msg(type, get_msg); @@ -967,7 +970,8 @@ static int _get_users(void *db_conn, return SLURM_ERROR; } - list_msg.my_list = acct_storage_g_get_users(db_conn, get_msg->cond); + list_msg.my_list = acct_storage_g_get_users(db_conn, *uid, + get_msg->cond); slurmdbd_free_cond_msg(DBD_GET_USERS, get_msg); *out_buffer = init_buf(1024); diff --git a/src/slurmdbd/read_config.c b/src/slurmdbd/read_config.c index 83f1c5873..aef0f2b99 100644 --- a/src/slurmdbd/read_config.c +++ b/src/slurmdbd/read_config.c @@ -87,6 +87,7 @@ static void _clear_slurmdbd_conf(void) xfree(slurmdbd_conf->log_file); xfree(slurmdbd_conf->pid_file); xfree(slurmdbd_conf->plugindir); + slurmdbd_conf->private_data = 0; xfree(slurmdbd_conf->slurm_user_name); slurmdbd_conf->step_purge = 0; xfree(slurmdbd_conf->storage_host); @@ -120,6 +121,7 @@ extern int read_slurmdbd_conf(void) {"MessageTimeout", S_P_UINT16}, {"PidFile", S_P_STRING}, {"PluginDir", S_P_STRING}, + {"PrivateData", S_P_STRING}, {"SlurmUser", S_P_STRING}, {"StepPurge", S_P_UINT16}, {"StorageHost", S_P_STRING}, @@ -129,8 +131,9 @@ extern int read_slurmdbd_conf(void) {"StorageType", S_P_STRING}, {"StorageUser", S_P_STRING}, {NULL} }; - s_p_hashtbl_t *tbl; - char *conf_path; + s_p_hashtbl_t *tbl = NULL; + char *conf_path = NULL; + char *temp_str = NULL; struct stat buf; /* Set initial values */ @@ -174,6 +177,30 @@ extern int read_slurmdbd_conf(void) } s_p_get_string(&slurmdbd_conf->pid_file, "PidFile", tbl); s_p_get_string(&slurmdbd_conf->plugindir, "PluginDir", tbl); + if (s_p_get_string(&temp_str, "PrivateData", tbl)) { + if (strstr(temp_str, "job")) + slurmdbd_conf->private_data + |= PRIVATE_DATA_JOBS; + if (strstr(temp_str, "node")) + slurmdbd_conf->private_data + |= PRIVATE_DATA_NODES; + if (strstr(temp_str, "partition")) + slurmdbd_conf->private_data + |= PRIVATE_DATA_PARTITIONS; + if (strstr(temp_str, "usage")) + slurmdbd_conf->private_data + |= PRIVATE_DATA_USAGE; + if (strstr(temp_str, "users")) + slurmdbd_conf->private_data + |= PRIVATE_DATA_USERS; + if (strstr(temp_str, "accounts")) + slurmdbd_conf->private_data + |= PRIVATE_DATA_ACCOUNTS; + if (strstr(temp_str, "all")) + slurmdbd_conf->private_data = 0xffff; + xfree(temp_str); + } + s_p_get_string(&slurmdbd_conf->slurm_user_name, "SlurmUser", tbl); if (!s_p_get_uint16(&slurmdbd_conf->step_purge, "StepPurge", @@ -232,6 +259,8 @@ extern int read_slurmdbd_conf(void) /* Log the current configuration using verbose() */ extern void log_config(void) { + char tmp_str[128]; + if (slurmdbd_conf->archive_age) { debug2("ArchiveAge = %u days", slurmdbd_conf->archive_age); @@ -249,6 +278,11 @@ extern void log_config(void) debug2("MessageTimeout = %u", slurmdbd_conf->msg_timeout); debug2("PidFile = %s", slurmdbd_conf->pid_file); debug2("PluginDir = %s", slurmdbd_conf->plugindir); + + private_data_string(slurmdbd_conf->private_data, + tmp_str, sizeof(tmp_str)); + + debug2("PrivateData = %s", tmp_str); debug2("SlurmUser = %s(%u)", slurmdbd_conf->slurm_user_name, slurmdbd_conf->slurm_user_id); debug2("StepPurge = %u days", slurmdbd_conf->step_purge); diff --git a/src/slurmdbd/read_config.h b/src/slurmdbd/read_config.h index 8c5895252..5a5077ab4 100644 --- a/src/slurmdbd/read_config.h +++ b/src/slurmdbd/read_config.h @@ -75,6 +75,7 @@ typedef struct slurm_dbd_conf { uint16_t msg_timeout; /* message timeout */ char * pid_file; /* where to store current PID */ char * plugindir; /* dir to look for plugins */ + uint16_t private_data; /* restrict information */ uint32_t slurm_user_id; /* uid of slurm_user_name */ char * slurm_user_name;/* user that slurmcdtld runs as */ uint16_t step_purge; /* purge time for step info */ diff --git a/src/sreport/cluster_reports.c b/src/sreport/cluster_reports.c index 752eb59e7..c28956716 100644 --- a/src/sreport/cluster_reports.c +++ b/src/sreport/cluster_reports.c @@ -220,7 +220,8 @@ static List _get_cluster_list(int argc, char *argv[], uint32_t *total_time, _set_cond(&i, argc, argv, cluster_cond, format_list); - cluster_list = acct_storage_g_get_clusters(db_conn, cluster_cond); + cluster_list = acct_storage_g_get_clusters(db_conn, my_uid, + cluster_cond); if(!cluster_list) { exit_code=1; fprintf(stderr, " Problem with cluster query.\n"); diff --git a/src/sreport/job_reports.c b/src/sreport/job_reports.c index d8d024530..56f31907a 100644 --- a/src/sreport/job_reports.c +++ b/src/sreport/job_reports.c @@ -504,7 +504,7 @@ extern int job_sizes_grouped_by_top_acct(int argc, char *argv[]) _setup_grouping_print_fields_list(grouping_list); - job_list = jobacct_storage_g_get_jobs_cond(db_conn, job_cond); + job_list = jobacct_storage_g_get_jobs_cond(db_conn, my_uid, job_cond); if(!job_list) { exit_code=1; fprintf(stderr, " Problem with job query.\n"); @@ -518,7 +518,8 @@ extern int job_sizes_grouped_by_top_acct(int argc, char *argv[]) assoc_cond.partition_list = job_cond->partition_list; assoc_cond.parent_acct = "root"; - assoc_list = acct_storage_g_get_associations(db_conn, &assoc_cond); + assoc_list = acct_storage_g_get_associations(db_conn, my_uid, + &assoc_cond); if(print_fields_have_header) { char start_char[20]; diff --git a/src/sreport/user_reports.c b/src/sreport/user_reports.c index 4e2347787..168a631ab 100644 --- a/src/sreport/user_reports.c +++ b/src/sreport/user_reports.c @@ -294,7 +294,7 @@ extern int user_top(int argc, char *argv[]) _setup_print_fields_list(format_list); list_destroy(format_list); - user_list = acct_storage_g_get_users(db_conn, user_cond); + user_list = acct_storage_g_get_users(db_conn, my_uid, user_cond); if(!user_list) { exit_code=1; fprintf(stderr, " Problem with user query.\n"); diff --git a/src/sstat/options.c b/src/sstat/options.c index 137604372..64dceded8 100644 --- a/src/sstat/options.c +++ b/src/sstat/options.c @@ -296,13 +296,7 @@ void parse_command_line(int argc, char **argv) if(params.opt_field_list) xfree(params.opt_field_list); - params.opt_field_list = - xrealloc(params.opt_field_list, - (params.opt_field_list==NULL? 0 : - strlen(params.opt_field_list)) + - strlen(optarg) + 1); - strcat(params.opt_field_list, optarg); - strcat(params.opt_field_list, ","); + xstrfmtcat(params.opt_field_list, "%s,", optarg); break; case 'h': params.opt_help = 1; @@ -370,13 +364,8 @@ void parse_command_line(int argc, char **argv) _addto_job_list(params.opt_job_list, optarg); } - if(!params.opt_field_list) { - params.opt_field_list = - xmalloc(sizeof(STAT_FIELDS)+1); - strcat(params.opt_field_list, STAT_FIELDS); - strcat(params.opt_field_list, ","); - } - + if(!params.opt_field_list) + xstrfmtcat(params.opt_field_list, "%s,", STAT_FIELDS); if (params.opt_verbose) { fprintf(stderr, "Options selected:\n" diff --git a/src/sview/grid.c b/src/sview/grid.c index 1ebdcc20e..c3c123fde 100644 --- a/src/sview/grid.c +++ b/src/sview/grid.c @@ -661,7 +661,7 @@ extern int setup_grid_table(GtkTable *table, List button_list, List node_list) ListIterator itr = NULL; sview_node_info_t *sview_node_info_ptr = NULL; #ifdef HAVE_BG - int y=0, z=0, x_offset=0, y_offset=0; + int y=0, z=0, x_offset=0, y_offset=0, default_y_offset=0; #endif if(!node_list) { @@ -693,14 +693,19 @@ extern int setup_grid_table(GtkTable *table, List button_list, List node_list) * get the nodes from the controller going up from the Z dim * instead of laying these out in a nice X fashion */ + + default_y_offset = (DIM_SIZE[Z] * DIM_SIZE[Y]) + + (DIM_SIZE[Y] - DIM_SIZE[Z]); + for (x=0; x<DIM_SIZE[X]; x++) { - y_offset = (DIM_SIZE[Z] * DIM_SIZE[Y]); + y_offset = default_y_offset; + for (y=0; y<DIM_SIZE[Y]; y++) { coord_y = y_offset - y; x_offset = DIM_SIZE[Z] - 1; for (z=0; z<DIM_SIZE[Z]; z++){ coord_x = x + x_offset; - + grid_button = xmalloc(sizeof(grid_button_t)); grid_button->inx = i++; grid_button->table = table; diff --git a/testsuite/expect/test21.15 b/testsuite/expect/test21.15 index 4cd661089..529e24a4c 100755 --- a/testsuite/expect/test21.15 +++ b/testsuite/expect/test21.15 @@ -1,7 +1,8 @@ #!/usr/bin/expect ############################################################################ # Purpose: Test of SLURM functionality -# sacctmgr add an user +# sacctmgr add user +# # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR @@ -33,259 +34,743 @@ source ./globals set test_id "21.15" set exit_code 0 -set acmatches 0 -set aamatches 0 -set lmatches 0 -set damatches 0 -set dcmatches 0 -set dumatches 0 -set not_support 0 -set add add -set lis list -set del delete -set mod modify -set nams Names -set nam Name -set fs Fairshare -set mc MaxCPUSecs -set mj MaxJobs -set mn MaxNodes -set mw MaxWall -set clu cluster set tc1 tcluster1 +set tc2 tcluster2 +set tc3 tcluster3 set fs1 2500 -set fs2 2200 +set fs2 1700 +set fs3 1 set mc1 1000000 -set mc2 200000 +set mc2 700000 +set mc3 1 set mj1 50 -set mj2 20 +set mj2 70 +set mj3 1 set mn1 300 -set mn2 200 +set mn2 700 +set mn3 1 set mw1 01:00:00 -set mw2 00:20:00 +set mw2 00:07:00 +set mw3 00:01:00 +set clu cluster +set cl1 1tmach +set cl2 2tmach +set cl3 3tmach set acc account -set acs accounts -set ass associations +set acc account +set nams names set nm1 testaccta1 -set nm2 tacct2 +set nm2 testaccta2 +set nm3 testaccta3 set des Description -set ds1 "Test Account A1" -set ds1 "TestAccount2" +set ds1 testaccounta1 +set ds2 testacct set org Organization -set or1 "Account Org A1" -set or1 "AcctOrg2" +set or1 accountorga1 +set or2 acttrg set qs QosLevel set qs1 normal +set par parent set usr user set us1 tuser1 +set us2 tuser2 +set us3 tuser3 set al AdminLevel set aln None +set ala Administrator +set alo Operator set dac DefaultAccount -set par Partitions +set pts Partitions +set fs fairshare +set mc maxcpu +set mj maxjob +set mn maxnode +set mw maxwall set dbu debug set access_err 0 + print_header $test_id -if { 0 == 0 } { - send_user "\n This test is not ready\n" - exit 0 -} - +set timeout 60 + +# +# Check accounting config and bail if not found. +# if { [test_account_storage] == 0 } { send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" exit 0 } - + + # # Use sacctmgr to create a cluster -# -set sadd_pid [spawn $sacctmgr -i add $clu $nams=$tc1 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] -expect { - -re "privilege to preform this action" { - set access_err 1 - exp_continue +# +proc _add_cluster {name} { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 } - -re "Adding Cluster" { - incr acmatches - exp_continue + + set my_pid [spawn $sacctmgr -i add cluster $name] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding Cluster" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + exit_code 1 + } + eof { + wait + } } - timeout { - send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid - set exit_code 1 + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters + got $matches\n" + incr exit_code 1 } - eof { - wait + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 } -} -if {$access_err != 0} { - send_user "\nWARNING: not authorized to perform this test\n" - exit $exit_code -} -if {$acmatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem adding clusters - got $acmatches\n" - set exit_code 1 + + return $exit_code } -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 +# +# Use sacctmgr to remove the test cluster +# +proc _remove_cluster {name} { + global access_err sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 + } + + set my_pid [spawn $sacctmgr -i delete cluster $name] + expect { + -re "privilege to perform this action" { + set access_err 1 + exp_continue + } + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Deleting clusters" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + if {$access_err != 0} { + return 1 + } + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $matches\n" + incr exit_code 1 + } + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code } # # Use sacctmgr to add an account # -set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc1 $des="$ds1" $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$nm1 $org="$or1" $qs=$qs1] -expect { - -re "Adding Account" { - incr aamatches - exp_continue +proc _add_acct { cluster name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid - set exit_code 1 + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" } - eof { - wait + + set my_pid [eval spawn $sacctmgr -i add account $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding Account" { + incr matches + exp_continue + } + -re "Associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 2} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $matches\n" + incr exit_code 1 } -} -if {$aamatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem adding account. - got $aamatches\n" - set exit_code 1 + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code } # -# Use sacctmgr to add a user +# Use sacctmgr to remove an account # -set as_list_pid [spawn $sacctmgr -i $add $usr $acs=$nm1 $al=$aln $clu=$tc1 $dac=$nm1 $fs=$fs2 $mc=$mc2 $mj=$mj2 $mn=$mn2 $mw=$mw2 $nams=$us1 $par=$dbu $qs=$qs1 ] -expect { - -re "$nams *$nm1" { - exp_continue +proc _remove_acct { cluster name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 1 + set check "Deleting account" + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr list associations not responding\n" - slow_kill $as_list_pid - set exit_code 1 + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" + set check "Deleting account associations" } - eof { - wait + + set my_pid [eval spawn $sacctmgr -i delete account $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "$check" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting account. + got $matches\n" + incr exit_code 1 + } + + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } } -} -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 + return $exit_code } # -# Use sacctmgr to list the test user +# Use sacctmgr to modify an account # -set as_list_pid [spawn $sacctmgr list $usr $nams=$us1 WithAssocs] -expect { - -re "$tc1 *$nm1 *$qs1 *None *$tc1 *$nm1 *$par *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { - exp_continue +proc _mod_acct { cluster name desc org parent fs maxcpu maxjob maxnodes maxwall wdesc worg} { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set expected 0 + set acct_stuff 0 + set assoc_stuff 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to modify\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr list user not responding\n" - slow_kill $as_list_pid - set exit_code 1 + + #set up the where + set wcommand "where $name" + + if { [string length $cluster] } { + set wcommand "$wcommand cluster=$cluster" } - eof { - wait + + if { [string length $wdesc] } { + set wcommand "$wcommand description='$wdesc'" } + + if { [string length $worg] } { + set wcommand "$wcommand organization='$worg'" + } + + #set up the set + set scommand "set" + if { [string length $parent] } { + set scommand "$scommand parent=$parent" + set assoc_stuff 1 + } + + if { [string length $fs] } { + set scommand "$scommand fairshare=$fs" + set assoc_stuff 1 + } + + if { [string length $maxcpu] } { + set scommand "$scommand maxc=$maxcpu" + set assoc_stuff 1 + } + + if { [string length $maxjob] } { + set scommand "$scommand maxj=$maxjob" + set assoc_stuff 1 + } + + if { [string length $maxnodes] } { + set scommand "$scommand maxn=$maxnodes" + set assoc_stuff 1 + } + + if { [string length $maxwall] } { + set scommand "$scommand maxw=$maxwall" + set assoc_stuff 1 + } + + if { [string length $desc] } { + set scommand "$scommand description='$desc'" + set acct_stuff 1 + } + + if { [string length $org] } { + set scommand "$scommand organization='$org'" + set acct_stuff 1 + } + + incr expected $acct_stuff + incr expected $assoc_stuff + + set my_pid [eval spawn $sacctmgr -i modify account $scommand $wcommand ] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Modified accounts" { + incr matches + exp_continue + } + -re "Modified account associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != $expected} { + send_user "\nFAILURE: sacctmgr had a problem modifying account. + got $matches needed $expected\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code } + # -# Use sacctmgr to delete the test user +# Use sacctmgr to add an user # -set sadel_pid [spawn $sacctmgr -i $del $usr $us1] -expect { - -re "Deleting users" { - incr dumatches - exp_continue +proc _add_user { account adminlevel cluster defaultaccount fs maxcpu maxjob maxnodes maxwall name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr delete not responding\n" - slow_kill $sadel_pid - set exit_code 1 + + set command "$name" + + if { [string length $account] } { + set command "$command account=$account" } - eof { - wait + + if { [string length $adminlevel] } { + set command "$command adminlevel=$adminlevel" + } + + if { [string length $cluster] } { + set command "$command cluster=$cluster" + } + + if { [string length $defaultaccount] } { + set command "$command defaultaccount=$defaultaccount" + } + + if { [string length $fs] } { + set command "$command fs=$fs" + } + + if { [string length $maxcpu] } { + set command "$command maxcpu=$maxcpu" + } + + if { [string length $maxjob] } { + set command "$command maxjob=$maxjob" + } + + if { [string length $maxnodes] } { + set command "$command maxnodes=$maxnodes" + } + + if { [string length maxwall$] } { + set command "$command maxwall=$maxwall" + } + + if { [string length $name] } { + set command "$command name=$name" } -} -if {$dumatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem deleting user got $dumatches\n" - set exit_code 1 + set my_pid [eval spawn $sacctmgr -i add user $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding User" { + incr matches + exp_continue + } + -re "Associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 2} { + send_user "\nFAILURE: sacctmgr had a problem adding user. + got $matches\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code } # -# Use sacctmgr to delete the test account +# Use sacctmgr to remove an user # -set sadel_pid [spawn $sacctmgr -i $del $acc $nm1] -expect { - -re "Deleting account" { - incr damatches - exp_continue +proc _remove_user { cluster acct user } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 1 + set check "Deleting user" + + if { ![string length $user] } { + send_user "FAILURE: we need a name to remove\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr delete not responding\n" - slow_kill $sadel_pid - set exit_code 1 + + set command "$user" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" + set check "Deleting user associations" } - eof { - wait + + if { [string length $acct] } { + set command "$command account=$acct" + set check "Deleting user associations" } + + set my_pid [eval spawn $sacctmgr -i delete user $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknown problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "$check" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting user. + got $matches\n" + incr exit_code 1 + } + + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code +} + +#make sure we have a clean system and permission to do this work +_remove_user "" "" "$us1,$us2,$us3" +_remove_acct "" "$nm1,$nm2,$nm3" +_remove_cluster "$tc1,$tc2,$tc3" +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code } -if {$damatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem deleting account got $damatches\n" - set exit_code 1 +#add cluster +incr exit_code [_add_cluster "$tc1,$tc2,$tc3"] +if { $exit_code } { + _remove_user "" "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code } -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 +#add accounts +incr exit_code [_add_acct "$tc1,$tc2,$tc3" "$nm1,$nm2,$nm3"] +if { $exit_code } { + _remove_user "" "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code +} + +#add users +# account adminlevel cluster defaultaccount fs maxcpu maxjob maxnodes maxwall name +incr exit_code [_add_user "$nm1,$nm2,$nm3" "$alo" "$tc1,$tc2,$tc3" "$nm2" "$fs1" "$mc1" "$mj1" "$mn1" "$mw1" $us1] +if { $exit_code } { + _remove_user "" "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code } # -# Use sacctmgr to delete the test cluster +# Use sacctmgr to list the test user additions # -set sadel_pid [spawn $sacctmgr -i $del $clu $tc1] +set matches 0 +set my_pid [spawn $sacctmgr -n -p list user format="User,Def,Admin,clus,acc,fair,maxc,maxj,maxn,maxw" names=$us1 withassoc] expect { - -re "Deleting clusters" { - incr dcmatches + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($us1.$nm2.$alo.($tc1|$tc2|$tc3).($nm1|$nm2|$nm3).$fs1.$mc1.$mj1.$mn1.$mw1.)" { + incr matches exp_continue } timeout { - send_user "\nFAILURE: sacctmgr delete not responding\n" - slow_kill $sadel_pid - set exit_code 1 + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 } eof { wait } } -if {$dcmatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $dcmatches\n" - set exit_code 1 +if {$matches != 9} { + send_user "\nFAILURE: Account addition 1 incorrect with only $matches.\n" + incr exit_code 1 } -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 -} +# This is the end below here + +incr exit_code [_remove_user "$tc1" "" "$us1,$us2,$us3"] +incr exit_code [_remove_user "" "$nm1,$nm2,$nm3" "$us1,$us2,$us3"] +incr exit_code [_remove_user "" "" "$us1,$us2,$us3"] +incr exit_code [_remove_acct "" "$nm1,$nm2,$nm3"] +incr exit_code [_remove_cluster "$tc1,$tc2,$tc3"] if {$exit_code == 0} { send_user "\nSUCCESS\n" - } else { +} else { send_user "\nFAILURE\n" - } - +} exit $exit_code + diff --git a/testsuite/expect/test21.16 b/testsuite/expect/test21.16 index ddb8b5b20..5d318f87e 100755 --- a/testsuite/expect/test21.16 +++ b/testsuite/expect/test21.16 @@ -1,7 +1,8 @@ #!/usr/bin/expect ############################################################################ # Purpose: Test of SLURM functionality -# sacctmgr add and list multiple users +# sacctmgr add multiple users +# # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR @@ -33,331 +34,736 @@ source ./globals set test_id "21.16" set exit_code 0 -set acmatches 0 -set aamatches 0 -set aa2matches 0 -set lmatches 0 -set damatches 0 -set dcmatches 0 -set dumatches 0 -set dlumatches 0 -set not_support 0 -set add add -set lis list -set del delete -set mod modify -set nams Names -set nam Name -set fs Fairshare -set mc MaxCPUSecs -set mj MaxJobs -set mn MaxNodes -set mw MaxWall -set clu cluster -set tc1 tclus1 -set tc2 tclus2 -set tc3 tclus3 +set tc1 tcluster1 +set tc2 tcluster2 +set tc3 tcluster3 set fs1 2500 -set fs2 2200 +set fs2 1700 +set fs3 1 set mc1 1000000 -set mc2 200000 +set mc2 700000 +set mc3 1 set mj1 50 -set mj2 20 +set mj2 70 +set mj3 1 set mn1 300 -set mn2 200 +set mn2 700 +set mn3 1 set mw1 01:00:00 -set mw2 00:20:00 +set mw2 00:07:00 +set mw3 00:01:00 +set clu cluster +set cl1 1tmach +set cl2 2tmach +set cl3 3tmach set acc account -set acs accounts -set ass associations +set acc account +set nams names set nm1 testaccta1 -set nm2 tacct2 +set nm2 testaccta2 +set nm3 testaccta3 set des Description -set ds1 "Test Account A1" -set ds1 "TestAccount2" +set ds1 testaccounta1 +set ds2 testacct set org Organization -set or1 "Account Org A1" -set or1 "AcctOrg2" +set or1 accountorga1 +set or2 acttrg set qs QosLevel set qs1 normal +set par parent set usr user set us1 tuser1 set us2 tuser2 +set us3 tuser3 set al AdminLevel set aln None +set ala Administrator +set alo Operator set dac DefaultAccount -set par Partitions +set pts Partitions +set fs fairshare +set mc maxcpu +set mj maxjob +set mn maxnode +set mw maxwall set dbu debug set access_err 0 + print_header $test_id -if { 0 == 0 } { - send_user "\n This test is not ready\n" - exit 0 -} - +set timeout 60 + +# +# Check accounting config and bail if not found. +# if { [test_account_storage] == 0 } { send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" exit 0 } - + + # # Use sacctmgr to create a cluster -# -set sadd_pid [spawn $sacctmgr -i add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] -expect { - -re "privilege to preform this action" { - set access_err 1 - exp_continue - } - -re "Adding Cluster" { - incr acmatches - exp_continue +# +proc _add_cluster {name} { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid - set exit_code 1 + + set my_pid [spawn $sacctmgr -i add cluster $name] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding Cluster" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + exit_code 1 + } + eof { + wait + } } - eof { - wait + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters + got $matches\n" + incr exit_code 1 } -} -if {$access_err != 0} { - send_user "\nWARNING: not authorized to perform this test\n" - exit $exit_code -} -if {$acmatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem adding clusters - got $acmatches\n" - set exit_code 1 -} -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + + return $exit_code } # -# Use sacctmgr to add an account +# Use sacctmgr to remove the test cluster # -set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc1,$tc2 $des="$ds1" $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$nm1 $org="$or1" $qs=$qs1] -expect { - -re "Adding Account" { - incr aamatches - exp_continue +proc _remove_cluster {name} { + global access_err sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid - set exit_code 1 + + set my_pid [spawn $sacctmgr -i delete cluster $name] + expect { + -re "privilege to perform this action" { + set access_err 1 + exp_continue + } + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Deleting clusters" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } } - eof { - wait + if {$access_err != 0} { + return 1 + } + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $matches\n" + incr exit_code 1 + } + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } } -} -if {$aamatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem adding account. - got $aamatches\n" - set exit_code 1 + return $exit_code } # -# Use sacctmgr to add a second account +# Use sacctmgr to add an account # -set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc2 $des="$ds1" $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$nm2 $org="$or1" $qs=$qs1] -expect { - -re "Adding Account" { - incr aa2matches - exp_continue +proc _add_acct { cluster name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid - set exit_code 1 + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" } - eof { - wait + + set my_pid [eval spawn $sacctmgr -i add account $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding Account" { + incr matches + exp_continue + } + -re "Associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } } -} -if {$aa2matches != 1} { - send_user "\nFAILURE: sacctmgr had a problem adding account. - got $aamatches\n" - set exit_code 1 + if {$matches != 2} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $matches\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code } # -# Use sacctmgr to add a user +# Use sacctmgr to remove an account # -set as_list_pid [spawn $sacctmgr -i $add $usr $acs=$nm1 $al=$aln $clu=$tc1 $dac=$nm2 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$us1 $par=$dbu $qs=$qs1 ] -expect { - -re "$nams *$nm1" { - exp_continue +proc _remove_acct { cluster name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 1 + set check "Deleting account" + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr list associations not responding\n" - slow_kill $as_list_pid - set exit_code 1 + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" + set check "Deleting account associations" } - eof { - wait + + set my_pid [eval spawn $sacctmgr -i delete account $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "$check" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } } -} -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting account. + got $matches\n" + incr exit_code 1 + } + + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code } # -# Use sacctmgr to add a second user +# Use sacctmgr to modify an account # -set as_list_pid [spawn $sacctmgr -i $add $usr $acs=$nm1,$nm2 $al=$aln $clu=$tc1,$tc2 $dac=$nm1 $fs=$fs2 $mc=$mc2 $mj=$mj2 $mn=$mn2 $mw=$mw2 $nams=$us2 $par=$dbu $qs=$qs1 ] -expect { - -re "$nams *$nm2" { - exp_continue +proc _mod_acct { cluster name desc org parent fs maxcpu maxjob maxnodes maxwall wdesc worg} { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set expected 0 + set acct_stuff 0 + set assoc_stuff 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to modify\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr list associations not responding\n" - slow_kill $as_list_pid - set exit_code 1 + + #set up the where + set wcommand "where $name" + + if { [string length $cluster] } { + set wcommand "$wcommand cluster=$cluster" } - eof { - wait + + if { [string length $wdesc] } { + set wcommand "$wcommand description='$wdesc'" } -} -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 -} + if { [string length $worg] } { + set wcommand "$wcommand organization='$worg'" + } -# -# Use sacctmgr to list the test user -# -#exp_internal 1 -set as_list_pid [spawn $sacctmgr list $usr $nams=$us1,$us2 WithAssocs] -expect { - -re "$us1 *$nm2 *$qs1 *$aln *$tc1 *$nm1 *$dbu *$fs1 *$mc1 *$mj1 *$mn1 *$mw1" { - incr dlumatches - exp_continue + #set up the set + set scommand "set" + if { [string length $parent] } { + set scommand "$scommand parent=$parent" + set assoc_stuff 1 } - -re "$us2 *$nm1 *$qs1 *$aln *$tc1 *$nm1 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { - incr dlumatches - exp_continue + + if { [string length $fs] } { + set scommand "$scommand fairshare=$fs" + set assoc_stuff 1 } - -re "$us2 *$nm1 *$qs1 *$aln *$tc2 *$nm2 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { - incr dlumatches - exp_continue + + if { [string length $maxcpu] } { + set scommand "$scommand maxc=$maxcpu" + set assoc_stuff 1 } - -re "$us2 *$nm1 *$qs1 *$aln *$tc2 *$nm1 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { - incr dlumatches - exp_continue + + if { [string length $maxjob] } { + set scommand "$scommand maxj=$maxjob" + set assoc_stuff 1 } - timeout { - send_user "\nFAILURE: sacctmgr list user not responding\n" - slow_kill $as_list_pid - set exit_code 1 + + if { [string length $maxnodes] } { + set scommand "$scommand maxn=$maxnodes" + set assoc_stuff 1 } - eof { - wait + + if { [string length $maxwall] } { + set scommand "$scommand maxw=$maxwall" + set assoc_stuff 1 + } + + if { [string length $desc] } { + set scommand "$scommand description='$desc'" + set acct_stuff 1 } -} -if {$dlumatches != 4} { - send_user "\nFAILURE: sacctmgr had a problem listing user got $dumatches\n" - set exit_code 1 + if { [string length $org] } { + set scommand "$scommand organization='$org'" + set acct_stuff 1 + } + + incr expected $acct_stuff + incr expected $assoc_stuff + + set my_pid [eval spawn $sacctmgr -i modify account $scommand $wcommand ] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Modified accounts" { + incr matches + exp_continue + } + -re "Modified account associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != $expected} { + send_user "\nFAILURE: sacctmgr had a problem modifying account. + got $matches needed $expected\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code } -#exp_internal 0 + + # -# Use sacctmgr to delete the test user +# Use sacctmgr to add an user # -set sadel_pid [spawn $sacctmgr -i $del $usr $us1,$us2] -expect { - -re "Deleting users" { - incr dumatches - exp_continue +proc _add_user { account adminlevel cluster defaultaccount fs maxcpu maxjob maxnodes maxwall name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr delete not responding\n" - slow_kill $sadel_pid - set exit_code 1 + + set command "$name" + + if { [string length $account] } { + set command "$command account=$account" } - eof { - wait + + if { [string length $adminlevel] } { + set command "$command adminlevel=$adminlevel" + } + + if { [string length $cluster] } { + set command "$command cluster=$cluster" + } + + if { [string length $defaultaccount] } { + set command "$command defaultaccount=$defaultaccount" + } + + if { [string length $fs] } { + set command "$command fs=$fs" + } + + if { [string length $maxcpu] } { + set command "$command maxcpu=$maxcpu" + } + + if { [string length $maxjob] } { + set command "$command maxjob=$maxjob" + } + + if { [string length $maxnodes] } { + set command "$command maxnodes=$maxnodes" } -} -if {$dumatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem deleting user got $dumatches\n" - set exit_code 1 + if { [string length maxwall$] } { + set command "$command maxwall=$maxwall" + } + + if { [string length $name] } { + set command "$command name=$name" + } + + set my_pid [eval spawn $sacctmgr -i add user $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding User" { + incr matches + exp_continue + } + -re "Associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 2} { + send_user "\nFAILURE: sacctmgr had a problem adding user. + got $matches\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code } # -# Use sacctmgr to delete the test account +# Use sacctmgr to remove an user # -set sadel_pid [spawn $sacctmgr -i $del $acc $nm1,$nm2] -expect { - -re "Deleting account" { - incr damatches - exp_continue +proc _remove_user { acct user } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 1 + set check "Deleting user" + + if { ![string length $user] } { + send_user "FAILURE: we need a name to remove\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr delete not responding\n" - slow_kill $sadel_pid - set exit_code 1 + + set command "$user" + + if { [string length $acct] } { + set command "$command account=$acct" + set check "Deleting user associations" } - eof { - wait + + set my_pid [eval spawn $sacctmgr -i delete user $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknown problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "$check" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting user. + got $matches\n" + incr exit_code 1 } + + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code } -if {$damatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem deleting account got $damatches\n" - set exit_code 1 +#make sure we have a clean system and permission to do this work +_remove_user "" "$us1,$us2,$us3" +_remove_acct "" "$nm1,$nm2,$nm3" +_remove_cluster "$tc1,$tc2,$tc3" +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code } -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 +#add cluster +incr exit_code [_add_cluster "$tc1,$tc2,$tc3"] +if { $exit_code } { + _remove_user "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code +} + +#add accounts +incr exit_code [_add_acct "$tc1,$tc2,$tc3" "$nm1,$nm2,$nm3"] +if { $exit_code } { + _remove_user "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code +} + +#add users +# account adminlevel cluster defaultaccount fs maxcpu maxjob maxnodes maxwall name +incr exit_code [_add_user "$nm1,$nm2,$nm3" "$alo" "$tc1,$tc2,$tc3" "$nm2" "$fs1" "$mc1" "$mj1" "$mn1" "$mw1" $us1,$us2,$us3] +if { $exit_code } { + _remove_user "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code } # -# Use sacctmgr to delete the test cluster +# Use sacctmgr to list the test user additions # -set sadel_pid [spawn $sacctmgr -i $del $clu $tc1,$tc2,$tc3] +set matches 0 +set my_pid [spawn $sacctmgr -n -p list user format="User,Def,Admin,clus,acc,fair,maxc,maxj,maxn,maxw" names=$us1,$us2,$us3 withassoc] expect { - -re "Deleting clusters" { - incr dcmatches + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "(($us1|$us2|$us3).$nm2.$alo.($tc1|$tc2|$tc3).($nm1|$nm2|$nm3).$fs1.$mc1.$mj1.$mn1.$mw1.)" { + incr matches exp_continue } timeout { - send_user "\nFAILURE: sacctmgr delete not responding\n" - slow_kill $sadel_pid - set exit_code 1 + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 } eof { wait } } -if {$dcmatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $dcmatches\n" - set exit_code 1 +if {$matches != 27} { + send_user "\nFAILURE: Account addition 1 incorrect with only $matches.\n" + incr exit_code 1 } -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 -} +# This is the end below here + +incr exit_code [_remove_user "" "$us1,$us2,$us3"] +incr exit_code [_remove_acct "" "$nm1,$nm2,$nm3"] +incr exit_code [_remove_cluster "$tc1,$tc2,$tc3"] if {$exit_code == 0} { send_user "\nSUCCESS\n" - } else { +} else { send_user "\nFAILURE\n" - } - +} exit $exit_code + diff --git a/testsuite/expect/test21.17 b/testsuite/expect/test21.17 index 3f1b47d9a..e7376f3de 100755 --- a/testsuite/expect/test21.17 +++ b/testsuite/expect/test21.17 @@ -2,6 +2,7 @@ ############################################################################ # Purpose: Test of SLURM functionality # sacctmgr modify a user +# # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR @@ -33,365 +34,879 @@ source ./globals set test_id "21.17" set exit_code 0 -set acmatches 0 -set aamatches 0 -set aa2matches 0 -set lmatches 0 -set damatches 0 -set dcmatches 0 -set dumatches 0 -set dlumatches 0 -set not_support 0 -set add add -set lis list -set del delete -set mod modify -set nams Names -set nam Name -set fs Fairshare -set mc MaxCPUSecs -set mj MaxJobs -set mn MaxNodes -set mw MaxWall -set clu cluster -set tc1 tclus1 -set tc2 tclus2 -set tc3 tclus3 +set tc1 tcluster1 +set tc2 tcluster2 +set tc3 tcluster3 set fs1 2500 -set fs2 1200 +set fs2 1700 +set fs3 1 set mc1 1000000 -set mc2 200000 -set mj1 100 -set mj2 50 +set mc2 700000 +set mc3 1 +set mj1 50 +set mj2 70 +set mj3 1 set mn1 300 -set mn2 200 +set mn2 700 +set mn3 1 set mw1 01:00:00 -set mw2 02:00:00 +set mw2 00:07:00 +set mw3 00:01:00 +set clu cluster +set cl1 1tmach +set cl2 2tmach +set cl3 3tmach set acc account -set acs accounts -set ass associations -set nm1 tacct1 -set nm2 tacct2 +set acc account +set nams names +set nm1 testaccta1 +set nm2 testaccta2 +set nm3 testaccta3 set des Description -set ds1 "onestestAccount" -set ds2 "testaccount2" +set ds1 testaccounta1 +set ds2 testacct set org Organization -set or1 "oneaccountOrg" -set or2 "acctorg2" +set or1 accountorga1 +set or2 acttrg set qs QosLevel set qs1 normal +set par parent set usr user set us1 tuser1 set us2 tuser2 +set us3 tuser3 set al AdminLevel set aln None -set ala Admin +set ala Administrator set alo Operator set dac DefaultAccount -set par Partitions +set pts Partitions +set fs fairshare +set mc maxcpu +set mj maxjob +set mn maxnode +set mw maxwall set dbu debug set access_err 0 + print_header $test_id +set timeout 60 + +# +# Check accounting config and bail if not found. +# if { [test_account_storage] == 0 } { send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" exit 0 } - + + # # Use sacctmgr to create a cluster -# -set sadd_pid [spawn $sacctmgr -i add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] -expect { - -re "privilege to preform this action" { - set access_err 1 - exp_continue +# +proc _add_cluster {name} { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 } - -re "Adding Cluster" { - incr acmatches - exp_continue + + set my_pid [spawn $sacctmgr -i add cluster $name] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding Cluster" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + exit_code 1 + } + eof { + wait + } } - timeout { - send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid - set exit_code 1 + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters + got $matches\n" + incr exit_code 1 } - eof { - wait + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 } + + return $exit_code } -if {$access_err != 0} { - send_user "\nWARNING: not authorized to perform this test\n" - exit $exit_code -} -if {$acmatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem adding clusters - got $acmatches\n" - set exit_code 1 -} -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 +# +# Use sacctmgr to remove the test cluster +# +proc _remove_cluster {name} { + global access_err sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 + } + + set my_pid [spawn $sacctmgr -i delete cluster $name] + expect { + -re "privilege to perform this action" { + set access_err 1 + exp_continue + } + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Deleting clusters" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + if {$access_err != 0} { + return 1 + } + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $matches\n" + incr exit_code 1 + } + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code } # -# Use sacctmgr to add test accounts +# Use sacctmgr to add an account # -set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc1,$tc2,$tc3 $des="$ds1" $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$nm1 $org="$or1" $qs=$qs1] -expect { - -re "Adding Account" { - incr aamatches - exp_continue +proc _add_acct { cluster name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid - set exit_code 1 + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" } - eof { - wait + + set my_pid [eval spawn $sacctmgr -i add account $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding Account" { + incr matches + exp_continue + } + -re "Associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } } -} -if {$aamatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem adding account. - got $aamatches\n" - set exit_code 1 + if {$matches != 2} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $matches\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code } # -# Use sacctmgr to add a second account +# Use sacctmgr to remove an account # -set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc1,$tc2,$tc3 $des=$ds2 $fs=$fs2 $mc=$mc2 $mj=$mj2 $mn=$mn2 $mw=$mw2 $nams=$nm2 $org=$or2 $qs=$qs1] -expect { - -re "Adding Account" { - incr aa2matches - exp_continue +proc _remove_acct { cluster name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 1 + set check "Deleting account" + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid - set exit_code 1 + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" + set check "Deleting account associations" } - eof { - wait + + set my_pid [eval spawn $sacctmgr -i delete account $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "$check" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting account. + got $matches\n" + incr exit_code 1 + } + + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } } -} -if {$aa2matches != 1} { - send_user "\nFAILURE: sacctmgr had a problem adding account. - got $aamatches\n" - set exit_code 1 + return $exit_code } # -# Use sacctmgr to add a user +# Use sacctmgr to modify an account # -set as_list_pid [spawn $sacctmgr -i $add $usr $acs=$nm1 $al=$alo $clu=$tc1,$tc2 $dac=$nm2 $fs=$fs2 $mc=$mc2 $mj=$mj2 $mn=$mn2 $mw=$mw2 $nams=$us1,$us2 $par=$dbu $qs=$qs1 ] -expect { - -re "$nams *$nm1" { - exp_continue +proc _mod_acct { cluster name desc org parent fs maxcpu maxjob maxnodes maxwall wdesc worg} { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set expected 0 + set acct_stuff 0 + set assoc_stuff 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to modify\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr list associations not responding\n" - slow_kill $as_list_pid - set exit_code 1 + + #set up the where + set wcommand "where $name" + + if { [string length $cluster] } { + set wcommand "$wcommand cluster=$cluster" } - eof { - wait + + if { [string length $wdesc] } { + set wcommand "$wcommand description='$wdesc'" + } + + if { [string length $worg] } { + set wcommand "$wcommand organization='$worg'" + } + + #set up the set + set scommand "set" + if { [string length $parent] } { + set scommand "$scommand parent=$parent" + set assoc_stuff 1 + } + + if { [string length $fs] } { + set scommand "$scommand fairshare=$fs" + set assoc_stuff 1 + } + + if { [string length $maxcpu] } { + set scommand "$scommand maxc=$maxcpu" + set assoc_stuff 1 + } + + if { [string length $maxjob] } { + set scommand "$scommand maxj=$maxjob" + set assoc_stuff 1 } -} -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 + if { [string length $maxnodes] } { + set scommand "$scommand maxn=$maxnodes" + set assoc_stuff 1 + } + + if { [string length $maxwall] } { + set scommand "$scommand maxw=$maxwall" + set assoc_stuff 1 + } + + if { [string length $desc] } { + set scommand "$scommand description='$desc'" + set acct_stuff 1 + } + + if { [string length $org] } { + set scommand "$scommand organization='$org'" + set acct_stuff 1 + } + + incr expected $acct_stuff + incr expected $assoc_stuff + + set my_pid [eval spawn $sacctmgr -i modify account $scommand $wcommand ] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Modified accounts" { + incr matches + exp_continue + } + -re "Modified account associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != $expected} { + send_user "\nFAILURE: sacctmgr had a problem modifying account. + got $matches needed $expected\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code } + # -# Use sacctmgr to list the test user +# Use sacctmgr to add an user # -set as_list_pid [spawn $sacctmgr list $usr $nams=$us1,$us2 WithAssocs] -expect { - -re "$us1" { - exp_continue +proc _add_user { cluster account name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr list user not responding\n" - slow_kill $as_list_pid - set exit_code 1 + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" } - eof { - wait + + if { [string length $account] } { + set command "$command account=$account" } + + if { [string length $name] } { + set command "$command name=$name" + } + + set my_pid [eval spawn $sacctmgr -i add user $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding User" { + incr matches + exp_continue + } + -re "Associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 2} { + send_user "\nFAILURE: sacctmgr had a problem adding user. + got $matches\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code } # -# Use sacctmgr to modify user +# Use sacctmgr to remove an user # -set as_list_pid [spawn $sacctmgr -i $mod $usr set $al=$ala $dac=$nm1 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $qs=$qs1 where $acs=$nm2 $al=$alo $clu=$tc2 $dac=$nm2 $nams=$us2 $par=$dbu $qs=$qs1 ] -expect { - -re "$nams *$nm2" { - exp_continue +proc _remove_user { user name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 1 + set check "Deleting user" + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr list associations not responding\n" - slow_kill $as_list_pid - set exit_code 1 + + set command "$name" + + if { [string length $user] } { + set command "$command user=$user" + set check "Deleting user associations" } - eof { - wait + + set my_pid [eval spawn $sacctmgr -i delete user $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknown problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "$check" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting user. + got $matches\n" + incr exit_code 1 + } + + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } } -} -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 + return $exit_code } # -# Use sacctmgr to list the test user +# Use sacctmgr to modify an user # -set as_list_pid [spawn $sacctmgr list $usr $nams=$us1,$us2 WithAssocs] -expect { - -re "$us1 *$nm2 *$qs1 *$alo *$tc1 *$nm2 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { - incr dlumatches - exp_continue +proc _mod_user { adminlevel defaultaccount fs maxcpu maxjob maxnodes maxwall waccounts wcluster wnames} { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set expected 0 + set acct_stuff 0 + set assoc_stuff 0 + + if { ![string length $wnames] } { + send_user "FAILURE: we need a name to modify\n" + return 1 } - -re "$us1 *$nm2 *$qs1 *$alo *$tc1 *$nm1 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { - incr dlumatches - exp_continue + + #set up the where + set wcommand "where" + + if { [string length $wcluster] } { + set wcommand "$wcommand cluster=$wcluster" } - -re "$us1 *$nm2 *$qs1 *$alo *$tc2 *$nm2 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { - incr dlumatches - exp_continue + + if { [string length $wnames] } { + set wcommand "$wcommand names='$wnames'" } - -re "$us1 *$nm2 *$qs1 *$alo *$tc2 *$nm1 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { - incr dlumatches - exp_continue + + if { [string length $waccounts] } { + set wcommand "$wcommand account='$waccount'" } - -re "$us2 *$nm1 *$qs1 *$ala.* *$tc1 *$nm2 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { - incr dlumatches - exp_continue + + #set up the set + set scommand "set" + + if { [string length $adminlevel] } { + set scommand "$scommand adminlevel=$adminlevel" + set acct_stuff 1 } - -re "$us2 *$nm1 *$qs1 *$ala.* *$tc1 *$nm1 *$dbu *$fs2 *$mc2 *$mj2 *$mn2 *$mw2" { - incr dlumatches - exp_continue + + if { [string length $defaultaccount] } { + set scommand "$scommand defaultaccount='$defaultaccount'" + set acct_stuff 1 } - -re "$us2 *$nm1 *$qs1 *$ala.* *$tc2 *$nm2 *$dbu *$fs1 *$mc1 *$mj1 *$mn1 *$mw1" { - incr dlumatches - exp_continue + + if { [string length $fs] } { + set scommand "$scommand fairshare=$fs" + set assoc_stuff 1 } - -re "$us2 *$nm1 *$qs1 *$ala.* *$tc2 *$nm1 *$dbu *$fs1 *$mc1 *$mj1 *$mn1 *$mw1" { - incr dlumatches - exp_continue + + if { [string length $maxcpu] } { + set scommand "$scommand maxc=$maxcpu" + set assoc_stuff 1 } - -re "$tc3" { - send_user "\nFAILURE: $tc3 found but not expected\n" - set exit_code 1 + + if { [string length $maxjob] } { + set scommand "$scommand maxj=$maxjob" + set assoc_stuff 1 } - timeout { - send_user "\nFAILURE: sacctmgr list user not responding\n" - slow_kill $as_list_pid - set exit_code 1 + + if { [string length $maxnodes] } { + set scommand "$scommand maxn=$maxnodes" + set assoc_stuff 1 } - eof { - wait + + if { [string length $maxwall] } { + set scommand "$scommand maxw=$maxwall" + set assoc_stuff 1 } -} -if {$dlumatches != 8} { - send_user "\nFAILURE: sacctmgr had a problem listing user got $dumatches of 8\n" - set exit_code 1 -} + incr expected $acct_stuff + incr expected $assoc_stuff -# -# Use sacctmgr to delete the test user -# -set sadel_pid [spawn $sacctmgr -i $del $usr $us1,$us2] -expect { - -re "Deleting users" { - incr dumatches - exp_continue + set my_pid [eval spawn $sacctmgr -i modify user $scommand $wcommand ] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknown problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Modified account associations" { + incr matches + exp_continue + } + -re "Modified users" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr modify not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } } - timeout { - send_user "\nFAILURE: sacctmgr delete not responding\n" - slow_kill $sadel_pid - set exit_code 1 + + if {$matches != $expected} { + send_user "\nFAILURE: sacctmgr had a problem modifying user. + got $matches needed $expected\n" + incr exit_code 1 } - eof { - wait + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 } + return $exit_code +} + +#make sure we have a clean system and permission to do this work +_remove_user "" "$us1,$us2,$us3" +_remove_acct "" "$nm1,$nm2,$nm3" +_remove_cluster "$tc1,$tc2,$tc3" +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} + +#add cluster +incr exit_code [_add_cluster "$tc1,$tc2,$tc3"] +if { $exit_code } { + _remove_user "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code +} + +#add accounts +incr exit_code [_add_acct "$tc1,$tc2,$tc3" "$nm1,$nm2,$nm3"] +if { $exit_code } { + _remove_user "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code +} + +#add users +incr exit_code [_add_user "$tc1,$tc2,$tc3" "$nm1,$nm2,$nm3" "$us1,$us2,$us3"] +if { $exit_code } { + _remove_user "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code } -if {$dumatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem deleting user got $dumatches\n" - set exit_code 1 +# First test change the AdminLevel and DefaultAccount of a user +# adminlevel defaultaccount fs maxcpu maxjob maxnodes maxwall waccounts wcluster wnames +incr exit_code [_mod_user $alo $nm2 "" "" "" "" "" "" "" "$us1"] +if { $exit_code } { + _remove_user "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code } # -# Use sacctmgr to delete the test account +# Use sacctmgr to list the test user modification # -set sadel_pid [spawn $sacctmgr -i $del $acc $nm1,$nm2] +set matches 0 +set my_pid [spawn $sacctmgr -n -p list user names=$us1,$us2,$us3 format="User,Def,Admin"] expect { - -re "Deleting account" { - incr damatches + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($us1.$nm2.$alo.|($us2|$us3).$nm1.$aln.)" { + incr matches exp_continue } timeout { - send_user "\nFAILURE: sacctmgr delete not responding\n" - slow_kill $sadel_pid - set exit_code 1 + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 } eof { wait } } -if {$damatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem deleting account got $damatches\n" - set exit_code 1 +if {$matches != 3} { + send_user "\nFAILURE: Account modification 1 incorrect with only $matches.\n" + incr exit_code 1 } -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 +# Next, test change the limits of one user +# adminlevel defaultaccount fs maxcpu maxjob maxnodes maxwall waccounts wcluster wnames + +incr exit_code [_mod_user "" "" $fs2 $mc2 $mj2 $mn2 $mw2 "" "$tc1,$tc2,$tc3" "$us2"] +if { $exit_code } { + _remove_user "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code } # -# Use sacctmgr to delete the test cluster +# Use sacctmgr to list the test user modifications # -set sadel_pid [spawn $sacctmgr -i $del $clu $tc1,$tc2,$tc3] +set matches 0 +set my_pid [spawn $sacctmgr -n -p list assoc users=$us1,$us2,$us3 format="User,Cluster,Fairshare,MaxC,MaxJ,MaxN,MaxW"] expect { - -re "Deleting clusters" { - incr dcmatches + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($us2.($tc1|$tc2|$tc3).$fs2.$mc2.$mj2.$mn2.$mw2.|($us1|$us3).($tc1|$tc2|$tc3).1.....)" { + incr matches exp_continue } timeout { - send_user "\nFAILURE: sacctmgr delete not responding\n" - slow_kill $sadel_pid - set exit_code 1 + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 } eof { wait } } -if {$dcmatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $dcmatches\n" - set exit_code 1 +if {$matches != 27} { + send_user "\nFAILURE: User modification 2 incorrect with only $matches.\n" + incr exit_code 1 } -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 -} +# This is the end below here + +incr exit_code [_remove_user "" "$us1,$us2,$us3"] +incr exit_code [_remove_acct "" "$nm1,$nm2,$nm3"] +incr exit_code [_remove_cluster "$tc1,$tc2,$tc3"] if {$exit_code == 0} { send_user "\nSUCCESS\n" } else { send_user "\nFAILURE\n" } - exit $exit_code + diff --git a/testsuite/expect/test21.18 b/testsuite/expect/test21.18 index d7dd326f2..84662a10b 100755 --- a/testsuite/expect/test21.18 +++ b/testsuite/expect/test21.18 @@ -2,6 +2,7 @@ ############################################################################ # Purpose: Test of SLURM functionality # sacctmgr modify multiple users +# # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR @@ -33,365 +34,960 @@ source ./globals set test_id "21.18" set exit_code 0 -set acmatches 0 -set aamatches 0 -set aa2matches 0 -set lmatches 0 -set damatches 0 -set dcmatches 0 -set dumatches 0 -set dlumatches 0 -set not_support 0 -set add add -set lis list -set del delete -set mod modify -set nams Names -set nam Name -set fs Fairshare -set mc MaxCPUSecs -set mj MaxJobs -set mn MaxNodes -set mw MaxWall -set clu cluster -set tc1 tclus1 -set tc2 tclus2 -set tc3 tclus3 +set tc1 tcluster1 +set tc2 tcluster2 +set tc3 tcluster3 set fs1 2500 -set fs2 1200 +set fs2 1700 +set fs3 1 set mc1 1000000 -set mc2 200000 -set mj1 100 -set mj2 50 +set mc2 700000 +set mc3 1 +set mj1 50 +set mj2 70 +set mj3 1 set mn1 300 -set mn2 200 +set mn2 700 +set mn3 1 set mw1 01:00:00 -set mw2 02:00:00 +set mw2 00:07:00 +set mw3 00:01:00 +set clu cluster +set cl1 1tmach +set cl2 2tmach +set cl3 3tmach +set acc account set acc account -set acs accounts -set ass associations -set nm1 tacct1 -set nm2 tacct2 +set nams names +set nm1 testaccta1 +set nm2 testaccta2 +set nm3 testaccta3 set des Description -set ds1 "onestestAccount" -set ds2 "testaccount2" +set ds1 testaccounta1 +set ds2 testacct set org Organization -set or1 "oneaccountOrg" -set or2 "acctorg2" +set or1 accountorga1 +set or2 acttrg set qs QosLevel set qs1 normal +set par parent set usr user set us1 tuser1 set us2 tuser2 +set us3 tuser3 set al AdminLevel set aln None set ala Administrator set alo Operator set dac DefaultAccount -set par Partitions +set pts Partitions +set fs fairshare +set mc maxcpu +set mj maxjob +set mn maxnode +set mw maxwall set dbu debug set access_err 0 + print_header $test_id +set timeout 60 + +# +# Check accounting config and bail if not found. +# if { [test_account_storage] == 0 } { send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" exit 0 } - + + # # Use sacctmgr to create a cluster -# -set sadd_pid [spawn $sacctmgr -i add $clu $nams=$tc1,$tc2,$tc3 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] -expect { - -re "privilege to preform this action" { - set access_err 1 - exp_continue +# +proc _add_cluster {name} { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 } - -re "Adding Cluster" { - incr acmatches - exp_continue + + set my_pid [spawn $sacctmgr -i add cluster $name] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding Cluster" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + exit_code 1 + } + eof { + wait + } } - timeout { - send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid - set exit_code 1 + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters + got $matches\n" + incr exit_code 1 } - eof { - wait + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 } + + return $exit_code } -if {$access_err != 0} { - send_user "\nWARNING: not authorized to perform this test\n" - exit $exit_code -} -if {$acmatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem adding clusters - got $acmatches\n" - set exit_code 1 -} -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 +# +# Use sacctmgr to remove the test cluster +# +proc _remove_cluster {name} { + global access_err sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 + } + + set my_pid [spawn $sacctmgr -i delete cluster $name] + expect { + -re "privilege to perform this action" { + set access_err 1 + exp_continue + } + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Deleting clusters" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + if {$access_err != 0} { + return 1 + } + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $matches\n" + incr exit_code 1 + } + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code } # -# Use sacctmgr to add test accounts +# Use sacctmgr to add an account # -set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc1,$tc2,$tc3 $des="$ds1" $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$nm1 $org="$or1" $qs=$qs1] -expect { - -re "Adding Account" { - incr aamatches - exp_continue +proc _add_acct { cluster name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid - set exit_code 1 + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" } - eof { - wait + + set my_pid [eval spawn $sacctmgr -i add account $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding Account" { + incr matches + exp_continue + } + -re "Associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } } -} -if {$aamatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem adding account. - got $aamatches\n" - set exit_code 1 + if {$matches != 2} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $matches\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code } # -# Use sacctmgr to add a second account +# Use sacctmgr to remove an account # -set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc1,$tc2,$tc3 $des=$ds2 $fs=$fs2 $mc=$mc2 $mj=$mj2 $mn=$mn2 $mw=$mw2 $nams=$nm2 $org=$or2 $qs=$qs1] -expect { - -re "Adding Account" { - incr aa2matches - exp_continue +proc _remove_acct { cluster name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 1 + set check "Deleting account" + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid - set exit_code 1 + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" + set check "Deleting account associations" } - eof { - wait + + set my_pid [eval spawn $sacctmgr -i delete account $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "$check" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } } -} -if {$aa2matches != 1} { - send_user "\nFAILURE: sacctmgr had a problem adding account. - got $aamatches\n" - set exit_code 1 + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting account. + got $matches\n" + incr exit_code 1 + } + + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code } # -# Use sacctmgr to add a user +# Use sacctmgr to modify an account # -set as_list_pid [spawn $sacctmgr -i $add $usr $acs=$nm1 $al=$alo $clu=$tc1,$tc2 $dac=$nm2 $fs=$fs2 $mc=$mc2 $mj=$mj2 $mn=$mn2 $mw=$mw2 $nams=$us1,$us2 $par=$dbu $qs=$qs1 ] -expect { - -re "$nams *$nm1" { - exp_continue +proc _mod_acct { cluster name desc org parent fs maxcpu maxjob maxnodes maxwall wdesc worg} { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set expected 0 + set acct_stuff 0 + set assoc_stuff 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to modify\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr list associations not responding\n" - slow_kill $as_list_pid - set exit_code 1 + + #set up the where + set wcommand "where $name" + + if { [string length $cluster] } { + set wcommand "$wcommand cluster=$cluster" } - eof { - wait + + if { [string length $wdesc] } { + set wcommand "$wcommand description='$wdesc'" + } + + if { [string length $worg] } { + set wcommand "$wcommand organization='$worg'" } -} -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 + #set up the set + set scommand "set" + if { [string length $parent] } { + set scommand "$scommand parent=$parent" + set assoc_stuff 1 + } + + if { [string length $fs] } { + set scommand "$scommand fairshare=$fs" + set assoc_stuff 1 + } + + if { [string length $maxcpu] } { + set scommand "$scommand maxc=$maxcpu" + set assoc_stuff 1 + } + + if { [string length $maxjob] } { + set scommand "$scommand maxj=$maxjob" + set assoc_stuff 1 + } + + if { [string length $maxnodes] } { + set scommand "$scommand maxn=$maxnodes" + set assoc_stuff 1 + } + + if { [string length $maxwall] } { + set scommand "$scommand maxw=$maxwall" + set assoc_stuff 1 + } + + if { [string length $desc] } { + set scommand "$scommand description='$desc'" + set acct_stuff 1 + } + + if { [string length $org] } { + set scommand "$scommand organization='$org'" + set acct_stuff 1 + } + + incr expected $acct_stuff + incr expected $assoc_stuff + + set my_pid [eval spawn $sacctmgr -i modify account $scommand $wcommand ] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Modified accounts" { + incr matches + exp_continue + } + -re "Modified account associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != $expected} { + send_user "\nFAILURE: sacctmgr had a problem modifying account. + got $matches needed $expected\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code } + # -# Use sacctmgr to list the test user +# Use sacctmgr to add an user # -set as_list_pid [spawn $sacctmgr list $usr $nams=$us1,$us2 WithAssocs] -expect { - -re "$us1" { - exp_continue +proc _add_user { cluster account name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr list user not responding\n" - slow_kill $as_list_pid - set exit_code 1 + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" } - eof { - wait + + if { [string length $account] } { + set command "$command account=$account" } + + if { [string length $name] } { + set command "$command name=$name" + } + + set my_pid [eval spawn $sacctmgr -i add user $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding User" { + incr matches + exp_continue + } + -re "Associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 2} { + send_user "\nFAILURE: sacctmgr had a problem adding user. + got $matches\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code } # -# Use sacctmgr to modify user +# Use sacctmgr to remove an user # -set as_list_pid [spawn $sacctmgr -i $mod $usr set $al=$ala $dac=$nm1 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $qs=$qs1 where $acs=$nm2 $al=$alo $clu=$tc2 $dac=$nm2 $nams=$us1,$us2 $par=$dbu $qs=$qs1 ] -expect { - -re "$nams *$nm2" { - exp_continue +proc _remove_user { user name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 1 + set check "Deleting user" + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr list associations not responding\n" - slow_kill $as_list_pid - set exit_code 1 + + set command "$name" + + if { [string length $user] } { + set command "$command user=$user" + set check "Deleting user associations" } - eof { - wait + + set my_pid [eval spawn $sacctmgr -i delete user $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknown problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "$check" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting user. + got $matches\n" + incr exit_code 1 } -} -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code } + # -# Use sacctmgr to list the test user +# Use sacctmgr to modify an user # -set as_list_pid [spawn $sacctmgr -n -p list $usr $nams=$us1,$us2 WithAssocs] -expect { - -re "$us1.$nm1.$qs1.$ala.$tc1.$nm2.$dbu.$fs2.$mc2.$mj2.$mn2.$mw2" { - incr dlumatches - exp_continue +proc _mod_user { adminlevel defaultaccount fs maxcpu maxjob maxnodes maxwall waccounts wcluster wnames} { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set expected 0 + set acct_stuff 0 + set assoc_stuff 0 + + if { ![string length $wnames] } { + send_user "FAILURE: we need a name to modify\n" + return 1 } - -re "$us1.$nm1.$qs1.$ala.$tc1.$nm1.$dbu.$fs2.$mc2.$mj2.$mn2.$mw2" { - incr dlumatches - exp_continue + + #set up the where + set wcommand "where" + + if { [string length $wcluster] } { + set wcommand "$wcommand cluster=$wcluster" } - -re "$us1.$nm1.$qs1.$ala.$tc2.$nm2.$dbu.$fs1.$mc1.$mj1.$mn1.$mw1" { - incr dlumatches - exp_continue + + if { [string length $wnames] } { + set wcommand "$wcommand names='$wnames'" } - -re "$us1.$nm1.$qs1.$ala.$tc2.$nm1.$dbu.$fs1.$mc1.$mj1.$mn1.$mw1" { - incr dlumatches - exp_continue + + if { [string length $waccounts] } { + set wcommand "$wcommand account='$waccount'" } - -re "$us2.$nm1.$qs1.$ala.$tc1.$nm2.$dbu.$fs2.$mc2.$mj2.$mn2.$mw2" { - incr dlumatches - exp_continue + + #set up the set + set scommand "set" + + if { [string length $adminlevel] } { + set scommand "$scommand adminlevel=$adminlevel" + set acct_stuff 1 } - -re "$us2.$nm1.$qs1.$ala.$tc1.$nm1.$dbu.$fs2.$mc2.$mj2.$mn2.$mw2" { - incr dlumatches - exp_continue + + if { [string length $defaultaccount] } { + set scommand "$scommand defaultaccount='$defaultaccount'" + set acct_stuff 1 } - -re "$us2.$nm1.$qs1.$ala.$tc2.$nm2.$dbu.$fs1.$mc1.$mj1.$mn1.$mw1" { - incr dlumatches - exp_continue + + if { [string length $fs] } { + set scommand "$scommand fairshare=$fs" + set assoc_stuff 1 } - -re "$us2.$nm1.$qs1.$ala.$tc2.$nm1.$dbu.$fs1.$mc1.$mj1.$mn1.$mw1" { - incr dlumatches - exp_continue + + if { [string length $maxcpu] } { + set scommand "$scommand maxc=$maxcpu" + set assoc_stuff 1 + } + + if { [string length $maxjob] } { + set scommand "$scommand maxj=$maxjob" + set assoc_stuff 1 + } + + if { [string length $maxnodes] } { + set scommand "$scommand maxn=$maxnodes" + set assoc_stuff 1 } - -re "$tc3" { - send_user "\nFAILURE: $tc3 found but not expected\n" + + if { [string length $maxwall] } { + set scommand "$scommand maxw=$maxwall" + set assoc_stuff 1 + } + + incr expected $acct_stuff + incr expected $assoc_stuff + + set my_pid [eval spawn $sacctmgr -i modify user $scommand $wcommand ] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknown problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Modified account associations" { + incr matches + exp_continue + } + -re "Modified users" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr modify not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != $expected} { + send_user "\nFAILURE: sacctmgr had a problem modifying user. + got $matches needed $expected\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code +} + +#make sure we have a clean system and permission to do this work +_remove_user "" "$us1,$us2,$us3" +_remove_acct "" "$nm1,$nm2,$nm3" +_remove_cluster "$tc1,$tc2,$tc3" +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} + +#add cluster +incr exit_code [_add_cluster "$tc1,$tc2,$tc3"] +if { $exit_code } { + _remove_user "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code +} + +#add accounts +incr exit_code [_add_acct "$tc1,$tc2,$tc3" "$nm1,$nm2,$nm3"] +if { $exit_code } { + _remove_user "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code +} + +#add users +incr exit_code [_add_user "$tc1,$tc2,$tc3" "$nm1,$nm2,$nm3" "$us1,$us2,$us3"] +if { $exit_code } { + _remove_user "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code +} + +# First test change the AdminLevel and DefaultAccount of the accounts +# adminlevel defaultaccount fs maxcpu maxjob maxnodes maxwall waccounts wcluster wnames +incr exit_code [_mod_user $alo $nm2 "" "" "" "" "" "" "" "$us1,$us2,$us3"] +if { $exit_code } { + _remove_user "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code +} + +# +# Use sacctmgr to list the test account modifications +# +set matches 0 +set my_pid [spawn $sacctmgr -n -p list user names=$us1,$us2,$us3 format="User,Def,Admin"] +expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($us1|$us2|$us3).$nm2.$alo." { + incr matches exp_continue - set exit_code 1 } timeout { - send_user "\nFAILURE: sacctmgr list user not responding\n" - slow_kill $as_list_pid - set exit_code 1 + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 } eof { wait } } -if {$dlumatches != 8} { - send_user "\nFAILURE: sacctmgr had a problem finding all user listing getting $dlumatches instead of 8\n" - set exit_code 1 +if {$matches != 3} { + send_user "\nFAILURE: Account modification 1 incorrect with only $matches.\n" + incr exit_code 1 +} + +# Test change the AdminLevel and DefaultAccount of two accounts +# adminlevel defaultaccount fs maxcpu maxjob maxnodes maxwall waccounts wcluster wnames +incr exit_code [_mod_user $ala $nm3 "" "" "" "" "" "" "" "$us1,$us3"] +if { $exit_code } { + _remove_user "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code } # -# Use sacctmgr to delete the test user +# Use sacctmgr to list the test account modifications # -set sadel_pid [spawn $sacctmgr -i $del $usr $us1,$us2] +set matches 0 +set my_pid [spawn $sacctmgr -n -p list user names=$us1,$us2,$us3 format="User,Def,Admin"] expect { - -re "Deleting users" { - incr dumatches + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "(($us1|$us3).$nm3.$ala.|$us2.$nm2.$alo.)" { + incr matches exp_continue } timeout { - send_user "\nFAILURE: sacctmgr delete not responding\n" - slow_kill $sadel_pid - set exit_code 1 + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 } eof { wait } } -if {$dumatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem deleting user got $dumatches\n" - set exit_code 1 +if {$matches != 3} { + send_user "\nFAILURE: Account modification 1 incorrect with only $matches.\n" + incr exit_code 1 +} + +# Next, test change the limits of the users +# adminlevel defaultaccount fs maxcpu maxjob maxnodes maxwall waccounts wcluster wnames + +incr exit_code [_mod_user "" "" $fs2 $mc2 $mj2 $mn2 $mw2 "" "$tc1,$tc2" "$us1,$us2,$us3"] +if { $exit_code } { + _remove_user "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code } # -# Use sacctmgr to delete the test account +# Use sacctmgr to list the test user modifications # -set sadel_pid [spawn $sacctmgr -i $del $acc $nm1,$nm2] +set matches 0 +set my_pid [spawn $sacctmgr -n -p list assoc users=$us1,$us2,$us3 format="User,Cluster,Fairshare,MaxC,MaxJ,MaxN,MaxW"] expect { - -re "Deleting account" { - incr damatches + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "(($us1|$us2|$us3).($tc1|$tc2).$fs2.$mc2.$mj2.$mn2.$mw2.)" { + incr matches exp_continue } timeout { - send_user "\nFAILURE: sacctmgr delete not responding\n" - slow_kill $sadel_pid - set exit_code 1 + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 } eof { wait } } -if {$damatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem deleting account got $damatches\n" - set exit_code 1 +if {$matches != 18} { + send_user "\nFAILURE: User modification 2 incorrect with only $matches.\n" + incr exit_code 1 } -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 +# Next, test change the limits of two users +# adminlevel defaultaccount fs maxcpu maxjob maxnodes maxwall waccounts wcluster wnames + +incr exit_code [_mod_user "" "" $fs3 $mc3 $mj3 $mn3 $mw3 "" "" "$us1,$us3"] +if { $exit_code } { + _remove_user "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code } # -# Use sacctmgr to delete the test cluster +# Use sacctmgr to list the test user modifications # -set sadel_pid [spawn $sacctmgr -i $del $clu $tc1,$tc2,$tc3] +set matches 0 +set my_pid [eval spawn $sacctmgr -n -p list assoc user=$us1,$us2,$us3 format="User,Cluster,Fairshare,MaxC,MaxJ,MaxN,MaxW"] expect { - -re "Deleting clusters" { - incr dcmatches + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + # Any time you use () around something you need to combine all + # those things together since you can miss some and they be thrown away + -re "(($us1|$us3).($tc1|$tc2).$fs3.$mc3.$mj3.$mn3.$mw3.|$us2.($tc1|$tc2).$fs2.$mc2.$mj2.$mn2.$mw2.)" { + incr matches exp_continue } timeout { - send_user "\nFAILURE: sacctmgr delete not responding\n" - slow_kill $sadel_pid - set exit_code 1 + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 } eof { wait } } -if {$dcmatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $dcmatches\n" - set exit_code 1 +if {$matches != 18} { + send_user "\nFAILURE: User modification 2 incorrect with $matches.\n" + incr exit_code 1 } -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 -} +# This is the end below here + +incr exit_code [_remove_user "" "$us1,$us2,$us3"] +incr exit_code [_remove_acct "" "$nm1,$nm2,$nm3"] +incr exit_code [_remove_cluster "$tc1,$tc2,$tc3"] if {$exit_code == 0} { send_user "\nSUCCESS\n" - } else { +} else { send_user "\nFAILURE\n" - } - +} exit $exit_code + diff --git a/testsuite/expect/test21.19 b/testsuite/expect/test21.19 index 789a61d9f..4ef4e6773 100755 --- a/testsuite/expect/test21.19 +++ b/testsuite/expect/test21.19 @@ -1,7 +1,8 @@ #!/usr/bin/expect ############################################################################ # Purpose: Test of SLURM functionality -# sacctmgr add/delete coordinator +# sacctmgr add a coordinator +# # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR @@ -33,54 +34,66 @@ source ./globals set test_id "21.19" set exit_code 0 -set acmatches 0 -set aamatches 0 -set camatches 0 -set cumatches 0 -set lmatches 0 -set damatches 0 -set dcmatches 0 -set dumatches 0 -set not_support 0 -set add add -set lis list -set del delete -set nams Names -set nam Name -set fs Fairshare -set mc MaxCPUSecs -set mj MaxJobs -set mn MaxNodes -set mw MaxWall -set clu cluster set tc1 tcluster1 +set tc2 tcluster2 +set tc3 tcluster3 set fs1 2500 +set fs2 1700 +set fs3 1 set mc1 1000000 +set mc2 700000 +set mc3 1 set mj1 50 +set mj2 70 +set mj3 1 set mn1 300 +set mn2 700 +set mn3 1 set mw1 01:00:00 +set mw2 00:07:00 +set mw3 00:01:00 +set clu cluster +set cl1 1tmach +set cl2 2tmach +set cl3 3tmach +set acc account set acc account -set acs accounts -set ass associations -set nm1 tacct1 +set nams names +set nm1 testaccta1 +set nm2 testaccta2 +set nm3 testaccta3 set des Description -set ds1 "tacct1" +set ds1 testaccounta1 +set ds2 testacct set org Organization -set or1 "acctorg2" +set or1 accountorga1 +set or2 acttrg set qs QosLevel set qs1 normal +set par parent set usr user set us1 tuser1 +set us2 tuser2 +set us3 tuser3 set al AdminLevel set aln None +set ala Administrator +set alo Operator set dac DefaultAccount -set cor Coordinator -set par Partition +set pts Partitions +set fs fairshare +set mc maxcpu +set mj maxjob +set mn maxnode +set mw maxwall set dbu debug set access_err 0 + print_header $test_id +set timeout 60 + # # Check accounting config and bail if not found. # @@ -88,274 +101,969 @@ if { [test_account_storage] == 0 } { send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" exit 0 } - + # # Use sacctmgr to create a cluster -# -set sadd_pid [spawn $sacctmgr -i add $clu $nams=$tc1 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1] -expect { - -re "privilege to preform this action" { - set access_err 1 - exp_continue +# +proc _add_cluster {name} { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 } - -re "Adding Cluster" { - incr acmatches - exp_continue + + set my_pid [spawn $sacctmgr -i add cluster $name] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding Cluster" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + exit_code 1 + } + eof { + wait + } } - timeout { - send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid - set exit_code 1 + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding clusters + got $matches\n" + incr exit_code 1 } - eof { - wait + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 } + + return $exit_code } -if {$access_err != 0} { - send_user "\nWARNING: not authorized to perform this test\n" - exit $exit_code -} -if {$acmatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem adding clusters - got $acmatches\n" - set exit_code 1 -} -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 +# +# Use sacctmgr to remove the test cluster +# +proc _remove_cluster {name} { + global access_err sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 + } + + set my_pid [spawn $sacctmgr -i delete cluster $name] + expect { + -re "privilege to perform this action" { + set access_err 1 + exp_continue + } + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Deleting clusters" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + if {$access_err != 0} { + return 1 + } + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $matches\n" + incr exit_code 1 + } + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code } # # Use sacctmgr to add an account # -set sadel_pid [spawn $sacctmgr -i $add $acc $clu=$tc1 $des="$ds1" $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$nm1 $org="$or1" $qs=$qs1] -expect { - -re "Adding Account" { - incr aamatches - exp_continue +proc _add_acct { cluster name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr add not responding\n" - slow_kill $sadd_pid - set exit_code 1 + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" } - eof { - wait + + set my_pid [eval spawn $sacctmgr -i add account $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding Account" { + incr matches + exp_continue + } + -re "Associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 2} { + send_user "\nFAILURE: sacctmgr had a problem adding account. + got $matches\n" + incr exit_code 1 } -} -if {$aamatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem adding account. - got $aamatches\n" - set exit_code 1 + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code } # -# Use sacctmgr to add a user +# Use sacctmgr to remove an account # -set as_list_pid [spawn $sacctmgr -i $add $usr $acs=$nm1 $al=$aln $clu=$tc1 $dac=$nm1 $fs=$fs1 $mc=$mc1 $mj=$mj1 $mn=$mn1 $mw=$mw1 $nams=$us1 $par=$dbu $qs=$qs1 ] -expect { - -re "$nams *$nm1" { - exp_continue +proc _remove_acct { cluster name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 1 + set check "Deleting account" + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr list associations not responding\n" - slow_kill $as_list_pid - set exit_code 1 + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" + set check "Deleting account associations" } - eof { - wait + + set my_pid [eval spawn $sacctmgr -i delete account $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "$check" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting account. + got $matches\n" + incr exit_code 1 + } + + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } } -} -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 + return $exit_code } # -# Use sacctmgr to add a coordinator +# Use sacctmgr to modify an account # -set as_list_pid [spawn $sacctmgr -i $add $cor $acs=$nm1 $nams=$us1 ] -expect { - -re "Adding *$cor" { - exp_continue +proc _mod_acct { cluster name desc org parent fs maxcpu maxjob maxnodes maxwall wdesc worg} { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set expected 0 + set acct_stuff 0 + set assoc_stuff 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to modify\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr list associations not responding\n" - slow_kill $as_list_pid - set exit_code 1 + + #set up the where + set wcommand "where $name" + + if { [string length $cluster] } { + set wcommand "$wcommand cluster=$cluster" } - eof { - wait + + if { [string length $wdesc] } { + set wcommand "$wcommand description='$wdesc'" } -} -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 + if { [string length $worg] } { + set wcommand "$wcommand organization='$worg'" + } + + #set up the set + set scommand "set" + if { [string length $parent] } { + set scommand "$scommand parent=$parent" + set assoc_stuff 1 + } + + if { [string length $fs] } { + set scommand "$scommand fairshare=$fs" + set assoc_stuff 1 + } + + if { [string length $maxcpu] } { + set scommand "$scommand maxc=$maxcpu" + set assoc_stuff 1 + } + + if { [string length $maxjob] } { + set scommand "$scommand maxj=$maxjob" + set assoc_stuff 1 + } + + if { [string length $maxnodes] } { + set scommand "$scommand maxn=$maxnodes" + set assoc_stuff 1 + } + + if { [string length $maxwall] } { + set scommand "$scommand maxw=$maxwall" + set assoc_stuff 1 + } + + if { [string length $desc] } { + set scommand "$scommand description='$desc'" + set acct_stuff 1 + } + + if { [string length $org] } { + set scommand "$scommand organization='$org'" + set acct_stuff 1 + } + + incr expected $acct_stuff + incr expected $assoc_stuff + + set my_pid [eval spawn $sacctmgr -i modify account $scommand $wcommand ] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Modified accounts" { + incr matches + exp_continue + } + -re "Modified account associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != $expected} { + send_user "\nFAILURE: sacctmgr had a problem modifying account. + got $matches needed $expected\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code } + # -# Use sacctmgr to list the test coordinator user +# Use sacctmgr to add an user # -set as_list_pid [spawn $sacctmgr -n list $usr withcoor WithAssocs] -expect { - -re "$us1 *$nm1 *$qs1 *None *$tc1 *$nm1 *$dbu *$fs1 *$mc1 *$mj1 *$mn1 *$mw1 *$nm1" { - incr cumatches - exp_continue +proc _add_user { cluster account name } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $name] } { + send_user "FAILURE: we need a name to add\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr list user not responding\n" - slow_kill $as_list_pid - set exit_code 1 + + set command "$name" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" } - eof { - wait + + if { [string length $account] } { + set command "$command account=$account" + } + + if { [string length $name] } { + set command "$command name=$name" } -} -if {$cumatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem finding coordinator user. - got $aamatches\n" - set exit_code 1 + set my_pid [eval spawn $sacctmgr -i add user $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding User" { + incr matches + exp_continue + } + -re "Associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 2} { + send_user "\nFAILURE: sacctmgr had a problem adding user. + got $matches\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code } # -# Use sacctmgr to list the test coordinator account. +# Use sacctmgr to remove an user # -set as_list_pid [spawn $sacctmgr -n list $acc withcoor WithAssocs] -expect { - -re "$nm1 *$ds1 *$or1 *$qs1 *$tc1 *root *$fs1 *$mc1 *$mj1 *$mn1 *$mw1 $us1" { - incr camatches - exp_continue +proc _remove_user { cluster acct user } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 1 + set check "Deleting user" + + if { ![string length $user] } { + send_user "FAILURE: we need a name to remove\n" + return 1 } - -re "$nm1 *$ds1 *$or1 *$qs1 *$tc1 *$us1 *$fs1 *$mc1 *$mj1 *$mn1 *$mw1 $us1" { - incr camatches - exp_continue + + set command "$user" + + if { [string length $cluster] } { + set command "$command cluster=$cluster" + set check "Deleting user associations" } - timeout { - send_user "\nFAILURE: sacctmgr list account not responding\n" - slow_kill $as_list_pid - set exit_code 1 + + if { [string length $acct] } { + set command "$command acct=$acct" + set check "Deleting user associations" } - eof { - wait + + set my_pid [eval spawn $sacctmgr -i delete user $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknown problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "$check" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting user. + got $matches\n" + incr exit_code 1 } + + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code } -if {$camatches != 2} { - send_user "\nFAILURE: sacctmgr had a problem finding coordinator account.\n" - set exit_code 1 +# +# Use sacctmgr to modify an user +# +proc _mod_user { adminlevel defaultaccount fs maxcpu maxjob maxnodes maxwall waccounts wcluster wnames} { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set expected 0 + set acct_stuff 0 + set assoc_stuff 0 + + if { ![string length $wnames] } { + send_user "FAILURE: we need a name to modify\n" + return 1 + } + + #set up the where + set wcommand "where" + + if { [string length $wcluster] } { + set wcommand "$wcommand cluster=$wcluster" + } + + if { [string length $wnames] } { + set wcommand "$wcommand names='$wnames'" + } + + if { [string length $waccounts] } { + set wcommand "$wcommand account='$waccount'" + } + + #set up the set + set scommand "set" + + if { [string length $adminlevel] } { + set scommand "$scommand adminlevel=$adminlevel" + set acct_stuff 1 + } + + if { [string length $defaultaccount] } { + set scommand "$scommand defaultaccount='$defaultaccount'" + set acct_stuff 1 + } + + if { [string length $fs] } { + set scommand "$scommand fairshare=$fs" + set assoc_stuff 1 + } + + if { [string length $maxcpu] } { + set scommand "$scommand maxc=$maxcpu" + set assoc_stuff 1 + } + + if { [string length $maxjob] } { + set scommand "$scommand maxj=$maxjob" + set assoc_stuff 1 + } + + if { [string length $maxnodes] } { + set scommand "$scommand maxn=$maxnodes" + set assoc_stuff 1 + } + + if { [string length $maxwall] } { + set scommand "$scommand maxw=$maxwall" + set assoc_stuff 1 + } + + incr expected $acct_stuff + incr expected $assoc_stuff + + set my_pid [eval spawn $sacctmgr -i modify user $scommand $wcommand ] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknown problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Modified account associations" { + incr matches + exp_continue + } + -re "Modified users" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr modify not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != $expected} { + send_user "\nFAILURE: sacctmgr had a problem modifying user. + got $matches needed $expected\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + return $exit_code } # -# Use sacctmgr to delete the test coordinator +# Use sacctmgr to add a coordinator # -set sadel_pid [spawn $sacctmgr -i $del $cor $us1] -expect { - -re "Deleting users" { - incr dumatches - exp_continue +proc _add_coor { accounts names } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + + if { ![string length $names] } { + send_user "FAILURE: we need a name to add\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr delete not responding\n" - slow_kill $sadel_pid - set exit_code 1 + + set command "$names" + + if { [string length $accounts] } { + set command "$command accounts=$accounts" } - eof { - wait + +# if { [string length $names] } { +# set command "$command names=$names" +# } + + set my_pid [eval spawn $sacctmgr -i add coordinator $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Adding Coordinator" { + incr matches + exp_continue + } + -re "Associations" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr add not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem adding coordinator. + got $matches\n" + incr exit_code 1 + } + + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 } + return $exit_code } # -# Use sacctmgr to delete the test user +# Use sacctmgr to remove a coordinator # -set sadel_pid [spawn $sacctmgr -i $del $usr $us1] -expect { - -re "Deleting users" { - incr dumatches - exp_continue +proc _remove_coor { accounts names } { + global sacctmgr timeout + + set exit_code 0 + set matches 0 + set nothing 1 + set check "Deleting user" + + if { ![string length $name] } { + send_user "FAILURE: we need a name to remove\n" + return 1 } - timeout { - send_user "\nFAILURE: sacctmgr delete not responding\n" - slow_kill $sadel_pid - set exit_code 1 + + set command "$name" + + if { [string length $accounts] } { + set command "$command accounts=$accounts" + set check "Deleting coordinator associations" } - eof { - wait + + if { [string length $names] } { + set command "$command names=$names" + set check "Deleting coordinator associations" } + + set my_pid [eval spawn $sacctmgr -i delete coordinator $command] + expect { + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknown problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "$check" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting coordinator. + got $matches\n" + incr exit_code 1 + } + + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + + return $exit_code +} + +#make sure we have a clean system and permission to do this work +_remove_user "" "" "$us1,$us2,$us3" +_remove_acct "" "$nm1,$nm2,$nm3" +_remove_cluster "$tc1,$tc2,$tc3" +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} + +#add cluster +incr exit_code [_add_cluster "$tc1,$tc2,$tc3"] +if { $exit_code } { + _remove_user "" "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code +} + +#add accounts +incr exit_code [_add_acct "$tc1,$tc2,$tc3" "$nm1,$nm2,$nm3"] +if { $exit_code } { + _remove_user "" "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code +} + +#add users +incr exit_code [_add_user "$tc1,$tc2,$tc3" "$nm1,$nm2,$nm3" "$us1,$us2,$us3"] +if { $exit_code } { + _remove_user "" "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code } -if {$dumatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem deleting user got $dumatches\n" - set exit_code 1 +# Add a coordinator +# accounts names +incr exit_code [_add_coor $nm1 $us1] +if { $exit_code } { + _remove_user "" "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code } # -# Use sacctmgr to delete the test account +# Use sacctmgr to list the test user modification # -set sadel_pid [spawn $sacctmgr -i $del $acc $nm1] +set matches 0 +set my_pid [spawn $sacctmgr -n -p list user names=$us1,$us2,$us3 withcoor] expect { - -re "Deleting account" { - incr damatches + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($us1.$nm1.$qs1.$aln.$nm1.|($us2|$us3).$nm1.$qs1.$aln..)" { + incr matches exp_continue } timeout { - send_user "\nFAILURE: sacctmgr delete not responding\n" - slow_kill $sadel_pid - set exit_code 1 + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 } eof { wait } } -if {$damatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem deleting account got $damatches\n" - set exit_code 1 +if {$matches != 3} { + send_user "\nFAILURE: Coordinator add 3 incorrect with only $matches.\n" + incr exit_code 1 } -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 +# Next, add coordinator to two users +incr exit_code [_add_coor "$nm1,$nm3" "$us2,$us3"] +if { $exit_code } { + _remove_user "" "" "$us1,$us2,$us3" + _remove_acct "" "$nm1,$nm2,$nm3" + _remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code } # -# Use sacctmgr to delete the test cluster +# Use sacctmgr to list the test coordinator additions # -set sadel_pid [spawn $sacctmgr -i $del $clu $tc1] +set matches 0 +set my_pid [spawn $sacctmgr -n -p list user names=$us1,$us2,$us3 withcoor] expect { - -re "Deleting clusters" { - incr dcmatches + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "($us1.$nm1.$qs1.$aln.$nm1.|($us2|$us3).$nm1.$qs1.$aln.$nm1,$nm3)" { + incr matches exp_continue } timeout { - send_user "\nFAILURE: sacctmgr delete not responding\n" - slow_kill $sadel_pid - set exit_code 1 + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + incr exit_code 1 } eof { wait } } -if {$dcmatches != 1} { - send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $dcmatches\n" - set exit_code 1 +if {$matches != 3} { + send_user "\nFAILURE: User modification 3 incorrect with only $matches.\n" + incr exit_code 1 } -if { ![check_acct_associations] } { - send_user "\nFAILURE: Our associations don't line up\n" - set exit_code 1 -} +# This is the end below here + +incr exit_code [_remove_user "" "" "$us1,$us2,$us3"] +incr exit_code [_remove_acct "" "$nm1,$nm2,$nm3"] +incr exit_code [_remove_cluster "$tc1,$tc2,$tc3"] if {$exit_code == 0} { send_user "\nSUCCESS\n" - } else { +} else { send_user "\nFAILURE\n" - } - +} exit $exit_code + diff --git a/testsuite/expect/test21.5 b/testsuite/expect/test21.5 index 2a81fc0c1..4110ad118 100755 --- a/testsuite/expect/test21.5 +++ b/testsuite/expect/test21.5 @@ -59,11 +59,75 @@ set access_err 0 print_header $test_id +# +# Check accounting config and bail if not found. +# if { [test_account_storage] == 0 } { send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" exit 0 } - + +# +# Use sacctmgr to delete the test cluster +# + set nothing 0 + set matches 0 + +set sadel_pid [spawn $sacctmgr -i $del $clu $tc1] + + expect { + -re "privilege to perform this action" { + set access_err 1 + exp_continue + } + -re "(There was a problem|Unknown condition|Bad format on|Bad MaxWall|Unknown option)" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + incr exit_code 1 + } + -re "Problem getting" { + send_user "FAILURE: there was a problem getting information from the database\n" + incr exit_code 1 + } + -re "Problem adding" { + send_user "FAILURE: there was an unknwon problem\n" + incr exit_code 1 + } + -re "No associations" { + send_user "FAILURE: your command didn't return anything\n" + incr exit_code 1 + } + -re "Deleting clusters" { + incr matches + exp_continue + } + -re " Nothing deleted" { + incr matches + set nothing 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr delete not responding\n" + slow_kill $my_pid + incr exit_code 1 + } + eof { + wait + } + } + if {$access_err != 0} { + return 1 + } + if {$matches != 1} { + send_user "\nFAILURE: sacctmgr had a problem deleting cluster got $matches\n" + incr exit_code 1 + } + if { !$nothing } { + if { ![check_acct_associations] } { + send_user "\nFAILURE: Our associations don't line up\n" + incr exit_code 1 + } + } + # # Use sacctmgr to create a cluster # @@ -126,6 +190,7 @@ if { ![check_acct_associations] } { send_user "\nFAILURE: Our associations don't line up\n" set exit_code 1 } + # # Use sacctmgr to delete the test cluster # diff --git a/testsuite/expect/test6.11 b/testsuite/expect/test6.11 index c32482491..7a8575c57 100755 --- a/testsuite/expect/test6.11 +++ b/testsuite/expect/test6.11 @@ -66,7 +66,7 @@ if {$job_id == 0} { # # Test basic scancel # -spawn $scancel $job_id +spawn $scancel -v $job_id expect { -re "error" { set exit_code 1 @@ -85,7 +85,7 @@ expect { # Test basic scancel with job already killed # set matches 0 -spawn $scancel $job_id +spawn $scancel -v $job_id expect { -re "error:" { send_user "This error is expected, no worries\n" diff --git a/testsuite/expect/test7.11 b/testsuite/expect/test7.11 index 196d75ce6..dd278611b 100755 --- a/testsuite/expect/test7.11 +++ b/testsuite/expect/test7.11 @@ -127,6 +127,10 @@ expect { incr matches exp_continue } + -re "Registered component of slurm test suite" { + incr matches + exp_continue + } timeout { send_user "\nFAILURE: srun not responding\n" slow_kill $srun_pid @@ -136,7 +140,7 @@ expect { wait } } -if {$matches != 1} { +if {$matches != 2} { send_user "\nFAILURE: spank help message not in srun help message\n" set exit_code 1 } diff --git a/testsuite/expect/test7.11.prog.c b/testsuite/expect/test7.11.prog.c index 914960617..954e8a1bf 100644 --- a/testsuite/expect/test7.11.prog.c +++ b/testsuite/expect/test7.11.prog.c @@ -52,6 +52,14 @@ struct spank_option spank_options[] = }, SPANK_OPTIONS_TABLE_END }; +struct spank_option spank_options_reg[] = +{ + { "test_suite_reg", "[opt_arg]", + "Registered component of slurm test suite.", 2, 0, + _test_opt_process + }, + SPANK_OPTIONS_TABLE_END +}; static int _test_opt_process(int val, const char *optarg, int remote) { @@ -65,6 +73,8 @@ static int _test_opt_process(int val, const char *optarg, int remote) /* Called from both srun and slurmd */ int slurm_spank_init(spank_t sp, int ac, char **av) { + if (spank_option_register(sp, spank_options_reg) != ESPANK_SUCCESS) + slurm_error("spank_option_register error"); if (spank_remote(sp) && (ac == 1)) opt_out_file = strdup(av[0]); diff --git a/testsuite/expect/test7.3 b/testsuite/expect/test7.3 index 86fe053a9..07b72d80a 100755 --- a/testsuite/expect/test7.3 +++ b/testsuite/expect/test7.3 @@ -57,7 +57,7 @@ send_user "slurm_dir is $slurm_dir\n" if {[test_aix]} { send_user "$bin_cc ${test_prog}.c -Wl,-brtl -g -pthread -o ${test_prog} -I${slurm_dir}/include -L${slurm_dir}/lib -lslurm -lntbl\n" exec $bin_cc ${test_prog}.c -Wl,-brtl -g -pthread -o ${test_prog} -I${slurm_dir}/include -L${slurm_dir}/lib -lslurm -lntbl -} elseif [file exists ${slurm_dir}/lib64] { +} elseif [file exists ${slurm_dir}/lib64/libslurm.so] { send_user "$bin_cc ${test_prog}.c -g -pthread -o ${test_prog} -I${slurm_dir}/include -Wl,--rpath=${slurm_dir}/lib64 -L${slurm_dir}/lib64 -lslurm\n" exec $bin_cc ${test_prog}.c -g -pthread -o ${test_prog} -I${slurm_dir}/include -Wl,--rpath=${slurm_dir}/lib64 -L${slurm_dir}/lib64 -lslurm } else { -- GitLab