From e05a1cf6570c2c777e0245cd688d23b209860f2d Mon Sep 17 00:00:00 2001 From: Mehdi Dogguy <mehdi@debian.org> Date: Mon, 8 Sep 2014 21:34:16 +0200 Subject: [PATCH] Imported Upstream version 2.1.9 --- META | 4 +- NEWS | 39 +- doc/man/man1/sacct.1 | 38 +- doc/man/man1/salloc.1 | 3 +- doc/man/man1/sbatch.1 | 3 +- doc/man/man1/scancel.1 | 6 + doc/man/man1/sreport.1 | 4 + doc/man/man1/srun.1 | 5 +- doc/man/man5/bluegene.conf.5 | 6 +- etc/slurmdbd.conf.example | 2 +- slurm.spec | 6 +- src/common/job_resources.c | 5 +- src/common/node_select.h | 7 +- .../filetxt/filetxt_jobacct_process.c | 4 +- .../mysql/accounting_storage_mysql.c | 30 +- .../mysql/mysql_jobacct_process.c | 13 +- .../pgsql/pgsql_jobacct_process.c | 8 +- .../jobacct_gather/aix/jobacct_gather_aix.c | 2 +- .../linux/jobacct_gather_linux.c | 30 +- .../multifactor/priority_multifactor.c | 2 +- src/plugins/proctrack/linuxproc/kill_tree.c | 8 +- src/plugins/sched/backfill/backfill.c | 2 +- .../select/bluegene/plugin/bg_job_place.c | 72 ++-- src/plugins/select/bluegene/plugin/bluegene.c | 3 +- src/plugins/select/linear/select_linear.c | 13 +- src/sacct/options.c | 4 + src/sacct/print.c | 16 +- src/sacct/sacct.h | 4 +- src/sacctmgr/sacctmgr.c | 12 +- src/salloc/opt.c | 5 + src/sbatch/opt.c | 5 + src/slurmctld/controller.c | 9 +- src/slurmctld/job_mgr.c | 43 -- src/slurmctld/job_scheduler.c | 8 +- src/slurmctld/node_mgr.c | 22 +- src/slurmctld/node_scheduler.c | 48 +-- src/slurmctld/slurmctld.h | 7 - src/slurmd/slurmd/req.c | 11 +- src/slurmd/slurmstepd/mgr.c | 3 +- src/sreport/sreport.c | 25 ++ src/srun/allocate.c | 1 + src/sstat/print.c | 6 +- src/sstat/sstat.c | 13 +- src/sview/common.c | 83 +++- src/sview/sview.c | 12 +- src/sview/sview.h | 2 + testsuite/expect/Makefile.am | 1 + testsuite/expect/README | 2 + testsuite/expect/globals | 31 ++ testsuite/expect/test21.21 | 243 +++++++++++- testsuite/expect/test21.27 | 2 +- testsuite/expect/test21.28 | 366 ++++++++++++++++++ 52 files changed, 1020 insertions(+), 279 deletions(-) create mode 100755 testsuite/expect/test21.28 diff --git a/META b/META index 3ef94ea78..aef01630c 100644 --- a/META +++ b/META @@ -3,9 +3,9 @@ Api_revision: 0 Major: 2 Meta: 1 - Micro: 8 + Micro: 9 Minor: 1 Name: slurm Release: 1 Release_tags: dist - Version: 2.1.8 + Version: 2.1.9 diff --git a/NEWS b/NEWS index 85c97fe69..6e070ec89 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,43 @@ This file describes changes in recent versions of SLURM. It primarily documents those changes that are of interest to users and admins. +* Changes in SLURM 2.1.9 +======================== + -- In select/linear - Fix logic to prevent over-subscribing memory with shared + nodes (Shared=YES or Shared=FORCE). + -- Fix for handling -N and --ntasks-per-node without specifying -n with + salloc and sbatch. + -- Fix jobacct_gather/linux if not polling on tasks to give tasks time to + start before doing initial gather. + -- When changing priority with the multifactor plugin we make sure we update + the last_job_update variable. + -- Fixed sview for gtk < 2.10 to display correct debug level at first. + -- Fixed sview to not select too fast when using a mouse right click. + -- Fixed sacct to display correct timelimits for jobs from accounting. + -- Fixed sacct when running as root by default query all users as documented. + -- In proctrack/linuxproc, skip over files in /proc that are not really user + processes (e.g. "/proc/bus"). + -- Fix documentation bug for slurmdbd.conf + -- Fix slurmctld to update qos preempt list without restart. + -- Fix bug in select/cons_res that in some cases would prevent a preempting job + from using of resources already allocated to a preemptable running job. + -- Fix for sreport in interactive mode to honor parsable/2 options. + -- Fixed minor bugs in sacct and sstat commands + -- BLUEGENE - Fixed issue if the slurmd becomes unresponsive and you have + blocks in an error state accounting is correct when the slurmd comes + back up. + -- Corrected documentation for -n option in srun/salloc/sbatch + -- BLUEGENE - when running a willrun test along with preemption the bluegene + plugin now does the correct thing. + -- Fix possible memory corruption issue which can cause slurmctld to abort. + -- BLUEGENE - fixed small memory leak when setting up env. + -- Fixed deadlock if using accounting and cluster changes size in the + database. This can happen if you mistakenly have multiple primary + slurmctld's running for a single cluster, which should rarely if ever + happen. + -- Fixed sacct -c option. + -- Critical bug fix in sched/backfill plugin that caused in memory corruption. + * Changes in SLURM 2.1.8 ======================== -- Update BUILD_NOTES for AIX and bgp systems on how to get sview to @@ -5037,4 +5074,4 @@ documents those changes that are of interest to users and admins. -- Change directory to /tmp in slurmd if daemonizing. -- Logfiles are reopened on reconfigure. -$Id: NEWS 20218 2010-05-07 21:29:56Z jette $ +$Id: NEWS 20444 2010-06-08 22:20:32Z jette $ diff --git a/doc/man/man1/sacct.1 b/doc/man/man1/sacct.1 index fd452fc79..8e0d41d64 100644 --- a/doc/man/man1/sacct.1 +++ b/doc/man/man1/sacct.1 @@ -625,25 +625,26 @@ was submitted. The format of the output is identical to that of the end field. How long the job was suspended for. .TP -\f3systemcpu\fP -The amount of system CPU time. (If job was running on multiple cpus this -is a combination of all the times so this number could be much larger -than the elapsed time.) -The format of the output is identical to that of the -\f3elapsed\fP -field. +\f3SystemCPU\fP +The amount of system CPU time used by the job or job step. The format +of the output is identical to that of the \f3elapsed\fP field. + +NOTE: SystemCPU provides a measure of the task's parent process and +does not include CPU time of child processes. .TP \f3timelimit\fP What the timelimit was/is for the job. .TP -\f3totalcpu\fP -The total amount CPU time actually used by the job, not just -accounted for (which most likely is a higher number). (If job was -running on multiple cpus this is a combination of all the times so -this number could be much larger than the elapsed time.) The format of -the output is identical to that of the elapsed field. +\f3TotalCPU\fP +The sum of the SystemCPU and UserCPU time used by the job or job step. +The total CPU time of the job may exceed the job's elapsed time for +jobs that include multiple job steps. The format of the output is +identical to that of the \f3elapsed\fP field. + +NOTE: TotalCPU provides a measure of the task's parent process and +does not include CPU time of child processes. .TP \f3uid\fP @@ -654,11 +655,12 @@ The user identifier of the user who ran the job. The user name of the user who ran the job. .TP -\f3usercpu\fP -The amount of user CPU time. (If job was running on multiple cpus -this is a combination of all the times so this number could be -much larger than the elapsed time.) The format of the output is -identical to that of the elapsed field. +\f3UserCPU\fP +The amount of user CPU time used by the job or job step. The format +of the output is identical to that of the \f3elapsed\fP field. + +NOTE: UserCPU provides a measure of the task's parent process and does +not include CPU time of child processes. .TP \f3wckey\fP diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index beb5d4b2d..414d43818 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -626,8 +626,7 @@ salloc does not launch tasks, it requests an allocation of resources and executed some command. This option advises the SLURM controller that job steps run within this allocation will launch a maximum of \fInumber\fR tasks and sufficient resources are allocated to accomplish this. -The default is one task per socket or core (depending upon the value -of the \fISelectTypeParameters\fR parameter in slurm.conf), but note +The default is one task per node, but note that the \fB\-\-cpus\-per\-task\fR option will change this default. .TP diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index 53e286a59..c62b53894 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -655,8 +655,7 @@ sbatch does not launch tasks, it requests an allocation of resources and submits a batch script. This option advises the SLURM controller that job steps run within this allocation will launch a maximum of \fInumber\fR tasks and sufficient resources are allocated to accomplish this. -The default is one task per socket or core (depending upon the value -of the \fISelectTypeParameters\fR parameter in slurm.conf), but note +The default is one task per node, but note that the \fB\-\-cpus\-per\-task\fR option will change this default. .TP diff --git a/doc/man/man1/scancel.1 b/doc/man/man1/scancel.1 index ce3e2b6cf..4c6169fde 100644 --- a/doc/man/man1/scancel.1 +++ b/doc/man/man1/scancel.1 @@ -173,6 +173,12 @@ If a signal value of "KILL" (the default value) is to be sent to an entire job, this will result in the job's termination and its resource allocation being released. .LP +Specifying no \-\-signal option will send a SIGTERM and +wait the KillWait duration as defined in the slurm.conf file before sending the +SIGKILL giving time for the running job/step(s) to clean up. To +immediately kill a job, you can specify \-\-signal=KILL which will +bypass the SIGTERM. +.LP Cancelling a job step will not result in a job being terminated. The job must be cancelled to release a resource allocation. diff --git a/doc/man/man1/sreport.1 b/doc/man/man1/sreport.1 index 3ec9547ed..866fde6b0 100644 --- a/doc/man/man1/sreport.1 +++ b/doc/man/man1/sreport.1 @@ -61,6 +61,10 @@ Identical to the \fBquit\fR command. \fBhelp\fP Display a description of sreport options and commands. +.TP +\fBnonparsable\fP +Return output to normal after parsable or parsable2 has been set. + .TP \fBparsable\fP Output will be | delimited with an ending '|'. diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index 8fd76ecc5..9524159ef 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -728,8 +728,7 @@ and without delaying the initiation of the job. \fB\-n\fR, \fB\-\-ntasks\fR=<\fInumber\fR> Specify the number of tasks to run. Request that \fBsrun\fR allocate resources for \fIntasks\fR tasks. -The default is one task per socket or core (depending upon the value -of the \fISelectTypeParameters\fR parameter in slurm.conf), but note +The default is one task per node, but note that the \fB\-\-cpus\-per\-task\fR option will change this default. .TP @@ -921,7 +920,7 @@ This option may be used to spread several job steps out among the nodes of the current job. If \fB\-r\fR is used, the current job step will begin at node \fIn\fR of the allocated nodelist, where the first node is considered node 0. The \fB\-r\fR option is not -permitted along with \fB\-w\fR or \fB\-x\fR, and will be silently +permitted along with \fB\-w\fR or \fB\-x\fR, and will be ignored when not running within a prior allocation (i.e. when SLURM_JOB_ID is not set). The default for \fIn\fR is 0. If the value of \fB\-\-nodes\fR exceeds the number of nodes identified diff --git a/doc/man/man5/bluegene.conf.5 b/doc/man/man5/bluegene.conf.5 index 1a016c205..9853319ee 100644 --- a/doc/man/man5/bluegene.conf.5 +++ b/doc/man/man5/bluegene.conf.5 @@ -13,9 +13,9 @@ Parameter names are case insensitive. Any text following a "#" in the configuration file is treated as a comment through the end of that line. The size of each line in the file is limited to 1024 characters. -Changes to the configuration file take effect upon restart of -SLURM daemons, daemon receipt of the SIGHUP signal, or execution -of the command "scontrol reconfigure" unless otherwise noted. +Changes to the configuration file take only effect upon restart of +the slurmctld daemon. "scontrol reconfigure" does nothing with this file. +Changes will only take place after a restart of the controller. .LP There are some differences between Bluegene/L and Bluegene/P in respects to the contents of the bluegene.conf file. diff --git a/etc/slurmdbd.conf.example b/etc/slurmdbd.conf.example index 3d609b084..6518d6423 100644 --- a/etc/slurmdbd.conf.example +++ b/etc/slurmdbd.conf.example @@ -33,7 +33,7 @@ PidFile=/var/run/slurmdbd.pid StorageType=accounting_storage/mysql #StorageHost=localhost #StoragePort=1234 -StoragePassword=password +StoragePass=password StorageUser=slurm #StorageLoc=slurm_acct_db diff --git a/slurm.spec b/slurm.spec index d03fd7ce5..40eb41123 100644 --- a/slurm.spec +++ b/slurm.spec @@ -83,14 +83,14 @@ %endif Name: slurm -Version: 2.1.8 +Version: 2.1.9 Release: 1%{?dist} Summary: Simple Linux Utility for Resource Management License: GPL Group: System Environment/Base -Source: slurm-2.1.8.tar.bz2 +Source: slurm-2.1.9.tar.bz2 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release} URL: https://computing.llnl.gov/linux/slurm/ @@ -352,7 +352,7 @@ Gives the ability for SLURM to use Berkeley Lab Checkpoint/Restart ############################################################################# %prep -%setup -n slurm-2.1.8 +%setup -n slurm-2.1.9 %build %configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \ diff --git a/src/common/job_resources.c b/src/common/job_resources.c index ba73b6e09..4a8b81698 100644 --- a/src/common/job_resources.c +++ b/src/common/job_resources.c @@ -224,10 +224,9 @@ extern void reset_node_bitmap(job_resources_t *job_resrcs_ptr, { if (job_resrcs_ptr) { if (job_resrcs_ptr->node_bitmap) - bit_free(job_resrcs_ptr->node_bitmap); + FREE_NULL_BITMAP(job_resrcs_ptr->node_bitmap); if (new_node_bitmap) { - job_resrcs_ptr->node_bitmap = - bit_copy(new_node_bitmap); + job_resrcs_ptr->node_bitmap = bit_copy(new_node_bitmap); job_resrcs_ptr->nhosts = bit_set_count(new_node_bitmap); } else job_resrcs_ptr->nhosts = 0; diff --git a/src/common/node_select.h b/src/common/node_select.h index 378ad7ed0..0ea1fcbc1 100644 --- a/src/common/node_select.h +++ b/src/common/node_select.h @@ -184,6 +184,7 @@ extern int select_g_alter_node_cnt (enum select_node_cnt type, void *data); #define SELECT_MODE_WILL_RUN 0x0002 #define SELECT_MODE_PREEMPT_FLAG 0x0100 +#define SELECT_MODE_CHECK_FULL 0x0200 #define SELECT_IS_MODE_RUN_NOW(_X) \ ((_X & SELECT_MODE_BASE) == SELECT_MODE_RUN_NOW) @@ -197,9 +198,11 @@ extern int select_g_alter_node_cnt (enum select_node_cnt type, void *data); #define SELECT_IS_PREEMPT_SET(_X) \ (_X & SELECT_MODE_PREEMPT_FLAG) +#define SELECT_IS_CHECK_FULL_SET(_X) \ + (_X & SELECT_MODE_CHECK_FULL) + #define SELECT_IS_TEST(_X) \ - ((SELECT_IS_MODE_TEST_ONLY(_X) || SELECT_IS_MODE_WILL_RUN(_X)) \ - && !SELECT_IS_PREEMPT_SET(_X)) + (SELECT_IS_MODE_TEST_ONLY(_X) || SELECT_IS_MODE_WILL_RUN(_X)) #define SELECT_IS_PREEMPTABLE_TEST(_X) \ ((SELECT_IS_MODE_TEST_ONLY(_X) || SELECT_IS_MODE_WILL_RUN(_X)) \ diff --git a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c index ebd380e45..73bf93005 100644 --- a/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c +++ b/src/plugins/accounting_storage/filetxt/filetxt_jobacct_process.c @@ -705,10 +705,8 @@ static int _parse_line(char *f[], void **data, int len) (*job)->nodes = xstrdup(f[F_NODES]); for (i=0; (*job)->nodes[i]; i++) { /* discard trailing <CR> */ - if (isspace((*job)->nodes[i])) { + if (isspace((*job)->nodes[i])) (*job)->nodes[i] = '\0'; - info("got here"); - } } if (!strcmp((*job)->nodes, "(null)")) { xfree((*job)->nodes); diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c index 52eac1bcb..c47073e7b 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c @@ -6098,9 +6098,10 @@ extern List acct_storage_p_modify_qos(mysql_conn_t *mysql_conn, uint32_t uid, ret_list = list_create(slurm_destroy_char); while((row = mysql_fetch_row(result))) { acct_qos_rec_t *qos_rec = NULL; + int id = atoi(row[2]); + if(preempt_bitstr) { - if(_preemption_loop(mysql_conn, - atoi(row[2]), preempt_bitstr)) + if(_preemption_loop(mysql_conn, id, preempt_bitstr)) break; } object = xstrdup(row[0]); @@ -6113,6 +6114,7 @@ extern List acct_storage_p_modify_qos(mysql_conn_t *mysql_conn, uint32_t uid, } qos_rec = xmalloc(sizeof(acct_qos_rec_t)); + qos_rec->id = id; qos_rec->name = xstrdup(object); qos_rec->grp_cpus = qos->grp_cpus; @@ -6136,27 +6138,29 @@ extern List acct_storage_p_modify_qos(mysql_conn_t *mysql_conn, uint32_t uid, list_iterator_create(qos->preempt_list); char *new_preempt = NULL; - qos->preempt_bitstr = bit_alloc(g_qos_count); + qos_rec->preempt_bitstr = bit_alloc(g_qos_count); if(row[1] && row[1][0]) - bit_unfmt(qos->preempt_bitstr, row[1]+1); + bit_unfmt(qos_rec->preempt_bitstr, row[1]+1); while((new_preempt = list_next(new_preempt_itr))) { bool cleared = 0; if(new_preempt[0] == '-') { - bit_clear(qos->preempt_bitstr, + bit_clear(qos_rec->preempt_bitstr, atoi(new_preempt+1)); } else if(new_preempt[0] == '+') { - bit_set(qos->preempt_bitstr, + bit_set(qos_rec->preempt_bitstr, atoi(new_preempt+1)); } else { if(!cleared) { cleared = 1; - bit_nclear(qos->preempt_bitstr, - 0, - bit_size(qos->preempt_bitstr)-1); + bit_nclear( + qos_rec->preempt_bitstr, + 0, + bit_size(qos_rec-> + preempt_bitstr)-1); } - bit_set(qos->preempt_bitstr, + bit_set(qos_rec->preempt_bitstr, atoi(new_preempt)); } } @@ -10618,7 +10622,11 @@ extern int jobacct_storage_p_job_start(mysql_conn_t *mysql_conn, global_last_rollup = check_time; slurm_mutex_unlock(&rollup_lock); - + /* If the times here are later than the daily_rollup + or monthly rollup it isn't a big deal since they + are always shrunk down to the beginning of each + time period. + */ query = xstrdup_printf("update %s set hourly_rollup=%d, " "daily_rollup=%d, monthly_rollup=%d", last_ran_table, check_time, diff --git a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c index 0280748e0..65f591106 100644 --- a/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c +++ b/src/plugins/accounting_storage/mysql/mysql_jobacct_process.c @@ -898,6 +898,7 @@ extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, uid_t uid, "t1.nodelist", "t1.node_inx", "t1.kill_requid", + "t1.timelimit", "t1.qos", "t2.user", "t2.cluster", @@ -934,6 +935,7 @@ extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, uid_t uid, JOB_REQ_NODELIST, JOB_REQ_NODE_INX, JOB_REQ_KILL_REQUID, + JOB_REQ_TIMELIMIT, JOB_REQ_QOS, JOB_REQ_USER_NAME, JOB_REQ_CLUSTER, @@ -1326,6 +1328,7 @@ extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, uid_t uid, job->priority = atoi(row[JOB_REQ_PRIORITY]); job->req_cpus = atoi(row[JOB_REQ_REQ_CPUS]); job->requid = atoi(row[JOB_REQ_KILL_REQUID]); + job->timelimit = atoi(row[JOB_REQ_TIMELIMIT]); job->qos = atoi(row[JOB_REQ_QOS]); job->show_full = 1; @@ -1398,6 +1401,8 @@ extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, uid_t uid, continue; step = create_jobacct_step_rec(); + step->tot_cpu_sec = 0; + step->tot_cpu_usec = 0; step->job_ptr = job; if(!job->first_step_ptr) job->first_step_ptr = step; @@ -1455,11 +1460,9 @@ extern List mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn, uid_t uid, atoi(step_row[STEP_REQ_USER_USEC]); step->sys_cpu_sec = atoi(step_row[STEP_REQ_SYS_SEC]); step->sys_cpu_usec = atoi(step_row[STEP_REQ_SYS_USEC]); - job->tot_cpu_sec += - step->tot_cpu_sec += + step->tot_cpu_sec += step->user_cpu_sec + step->sys_cpu_sec; - job->tot_cpu_usec += - step->tot_cpu_usec += + step->tot_cpu_usec += step->user_cpu_usec + step->sys_cpu_usec; step->sacct.max_vsize = atoi(step_row[STEP_REQ_MAX_VSIZE]); @@ -1582,6 +1585,7 @@ extern int mysql_jobacct_process_archive(mysql_conn_t *mysql_conn, "nodelist", "node_inx", "kill_requid", + "timelimit", "qos" }; @@ -1676,6 +1680,7 @@ extern int mysql_jobacct_process_archive(mysql_conn_t *mysql_conn, JOB_REQ_NODELIST, JOB_REQ_NODE_INX, JOB_REQ_KILL_REQUID, + JOB_REQ_TIMELIMIT, JOB_REQ_QOS, JOB_REQ_COUNT }; diff --git a/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.c b/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.c index 033d517ef..3069680f8 100644 --- a/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.c +++ b/src/plugins/accounting_storage/pgsql/pgsql_jobacct_process.c @@ -593,6 +593,8 @@ no_cond: xfree(query); for(j = 0; j < PQntuples(step_result); j++) { step = create_jobacct_step_rec(); + step->tot_cpu_sec = 0; + step->tot_cpu_usec = 0; step->job_ptr = job; if(!job->first_step_ptr) job->first_step_ptr = step; @@ -654,11 +656,9 @@ no_cond: PQgetvalue(step_result, j, STEP_REQ_SYS_SEC)); step->sys_cpu_usec = atoi( PQgetvalue(step_result, j, STEP_REQ_SYS_USEC)); - job->tot_cpu_sec += - step->tot_cpu_sec += + step->tot_cpu_sec += step->user_cpu_sec + step->sys_cpu_sec; - job->tot_cpu_usec += - step->tot_cpu_usec += + step->tot_cpu_usec += step->user_cpu_usec + step->sys_cpu_usec; step->sacct.max_vsize = atoi( PQgetvalue(step_result, j, diff --git a/src/plugins/jobacct_gather/aix/jobacct_gather_aix.c b/src/plugins/jobacct_gather/aix/jobacct_gather_aix.c index 51908c692..32a1e0304 100644 --- a/src/plugins/jobacct_gather/aix/jobacct_gather_aix.c +++ b/src/plugins/jobacct_gather/aix/jobacct_gather_aix.c @@ -346,7 +346,7 @@ static void _acct_kill_job(void) static void *_watch_tasks(void *arg) { - while(!jobacct_shutdown) { /* Do this until shutdown is requested */ + while(!jobacct_shutdown) { /* Do this until shutdown is requested */ if(!jobacct_suspended) { _get_process_data(); /* Update the data */ } diff --git a/src/plugins/jobacct_gather/linux/jobacct_gather_linux.c b/src/plugins/jobacct_gather/linux/jobacct_gather_linux.c index 823bfe371..c4b42643b 100644 --- a/src/plugins/jobacct_gather/linux/jobacct_gather_linux.c +++ b/src/plugins/jobacct_gather/linux/jobacct_gather_linux.c @@ -336,9 +336,10 @@ static void _get_process_data() { jobacct->min_cpu = jobacct->tot_cpu = MAX(jobacct->min_cpu, (prec->usec + prec->ssec)); - debug2("%d mem size %u %u time %u", - jobacct->pid, jobacct->max_rss, - jobacct->max_vsize, jobacct->tot_cpu); + debug2("%d mem size %u %u time %u(%u+%u)", + jobacct->pid, jobacct->max_rss, + jobacct->max_vsize, jobacct->tot_cpu, + prec->usec, prec->ssec); break; } } @@ -436,8 +437,8 @@ static int _get_process_data_line(int in, prec_t *prec) { prec->pages = majflt; prec->usec = utime; prec->ssec = stime; - prec->vsize = vsize / 1024; /* convert from bytes to KB */ - prec->rss = rss * getpagesize() / 1024; /* convert from pages to KB */ + prec->vsize = vsize / 1024; /* convert from bytes to KB */ + prec->rss = rss * getpagesize() / 1024;/* convert from pages to KB */ return 1; } @@ -622,7 +623,8 @@ extern void jobacct_gather_p_change_poll(uint16_t frequency) pthread_t _watch_tasks_thread_id; /* create polling thread */ slurm_attr_init(&attr); - if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) + if (pthread_attr_setdetachstate(&attr, + PTHREAD_CREATE_DETACHED)) error("pthread_attr_setdetachstate error %m"); if (pthread_create(&_watch_tasks_thread_id, &attr, @@ -682,11 +684,21 @@ extern int jobacct_gather_p_add_task(pid_t pid, jobacct_id_t *jobacct_id) extern struct jobacctinfo *jobacct_gather_p_stat_task(pid_t pid) { - _get_process_data(); - if(pid) + if(pid) { + _get_process_data(); return jobacct_common_stat_task(pid, task_list); - else + } else { + /* In this situation, we are just trying to get a + * basis of information since we are not pollng. So + * we will give a chance for processes to spawn before we + * gather information. This should largely eliminate the + * the chance of having /proc open when the tasks are + * spawned, which would prevent a valid checkpoint/restart + * with some systems */ + _task_sleep(1); + _get_process_data(); return NULL; + } } extern struct jobacctinfo *jobacct_gather_p_remove_task(pid_t pid) diff --git a/src/plugins/priority/multifactor/priority_multifactor.c b/src/plugins/priority/multifactor/priority_multifactor.c index aa3bda1ae..2d1954259 100644 --- a/src/plugins/priority/multifactor/priority_multifactor.c +++ b/src/plugins/priority/multifactor/priority_multifactor.c @@ -816,7 +816,7 @@ static void *_decay_thread(void *no_data) job_ptr->priority = _get_priority_internal(start_time, job_ptr); - + last_job_update = time(NULL); debug2("priority for job %u is now %u", job_ptr->job_id, job_ptr->priority); } diff --git a/src/plugins/proctrack/linuxproc/kill_tree.c b/src/plugins/proctrack/linuxproc/kill_tree.c index 41836c113..4f32c3827 100644 --- a/src/plugins/proctrack/linuxproc/kill_tree.c +++ b/src/plugins/proctrack/linuxproc/kill_tree.c @@ -144,7 +144,7 @@ static int get_myname(char *s) return 0; } -static xppid_t **_build_hashtbl() +static xppid_t **_build_hashtbl(void) { DIR *dir; struct dirent *de; @@ -165,11 +165,13 @@ static xppid_t **_build_hashtbl() while ((de = readdir(dir)) != NULL) { num = de->d_name; + if ((num[0] < '0') || (num[0] > '9')) + continue; ret_l = strtol(num, &endptr, 10); - if(errno == ERANGE) + if(errno == ERANGE) { error("couldn't do a strtol on str %s(%d): %m", num, ret_l); - + } if (endptr == NULL || *endptr != 0) continue; sprintf(path, "/proc/%s/stat", num); diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index af28f7821..d936e648a 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -563,7 +563,7 @@ static void _attempt_backfill(void) continue; } - if (node_space_recs == max_backfill_job_cnt) { + if (node_space_recs >= max_backfill_job_cnt) { /* Already have too many jobs to deal with */ break; } diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c index e8139e4a0..8ff04f891 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_place.c +++ b/src/plugins/select/bluegene/plugin/bg_job_place.c @@ -315,7 +315,7 @@ static bg_record_t *_find_matching_block(List block_list, } else if((bg_record->job_running != NO_JOB_RUNNING) && (bg_record->job_running != job_ptr->job_id) && (bg_conf->layout_mode == LAYOUT_DYNAMIC - || (!SELECT_IS_TEST(query_mode) + || (SELECT_IS_MODE_RUN_NOW(query_mode) && bg_conf->layout_mode != LAYOUT_DYNAMIC))) { debug("block %s in use by %s job %d", bg_record->bg_block_id, @@ -520,7 +520,7 @@ static int _check_for_booted_overlapping_blocks( if(is_test && overlapped_list && found_record->job_ptr && bg_record->job_running == NO_JOB_RUNNING) { - debug2("found over lapping block %s " + debug2("found overlapping block %s " "overlapped %s with job %u", found_record->bg_block_id, bg_record->bg_block_id, @@ -666,7 +666,13 @@ static int _dynamically_request(List block_list, int *blocks_added, debug2("going to create %d", request->size); list_of_lists = list_create(NULL); - if(user_req_nodes) + /* If preempt is set and we are checking full system it means + we altered the block list so only look at it. + */ + if(SELECT_IS_PREEMPT_SET(query_mode) + && SELECT_IS_CHECK_FULL_SET(query_mode)) { + list_append(list_of_lists, block_list); + } else if(user_req_nodes) list_append(list_of_lists, bg_lists->job_running); else { list_append(list_of_lists, block_list); @@ -699,8 +705,7 @@ static int _dynamically_request(List block_list, int *blocks_added, while((bg_record = list_pop(new_blocks))) { if(block_exist_in_list(block_list, bg_record)) destroy_bg_record(bg_record); - else if(SELECT_IS_PREEMPTABLE_TEST( - query_mode)) { + else if(SELECT_IS_TEST(query_mode)) { /* Here we don't really want to create the block if we are testing. @@ -893,7 +898,7 @@ static int _find_best_block_match(List block_list, * works we will have can look and see the earliest * the job can start. This doesn't apply to Dynamic mode. */ - if(is_test + if(is_test && SELECT_IS_CHECK_FULL_SET(query_mode) && bg_conf->layout_mode != LAYOUT_DYNAMIC) overlapped_list = list_create(NULL); @@ -906,8 +911,7 @@ static int _find_best_block_match(List block_list, overlap_check, overlapped_list, query_mode); - if(!bg_record && is_test - && bg_conf->layout_mode != LAYOUT_DYNAMIC + if(!bg_record && overlapped_list && list_count(overlapped_list)) { ListIterator itr = list_iterator_create(overlapped_list); @@ -921,7 +925,7 @@ static int _find_best_block_match(List block_list, list_iterator_destroy(itr); } - if(is_test && bg_conf->layout_mode != LAYOUT_DYNAMIC) + if(overlapped_list) list_destroy(overlapped_list); /* set the bitmap and do other allocation activities */ @@ -995,8 +999,10 @@ static int _find_best_block_match(List block_list, continue; } - - if(is_test) { + /* Only look at the full system if we aren't going to + preempt jobs later and look. + */ + if(is_test && SELECT_IS_CHECK_FULL_SET(query_mode)) { List new_blocks = NULL; List job_list = list_create(NULL); ListIterator itr = NULL; @@ -1294,7 +1300,8 @@ static void _build_select_struct(struct job_record *job_ptr, } } -static List _get_preemptables(bg_record_t *bg_record, List preempt_jobs) +static List _get_preemptables(uint16_t query_mode, bg_record_t *bg_record, + List preempt_jobs) { List preempt = NULL; ListIterator itr; @@ -1321,10 +1328,10 @@ static List _get_preemptables(bg_record_t *bg_record, List preempt_jobs) break; } if(job_ptr) { - list_append(preempt, job_ptr); + list_push(preempt, job_ptr); /* info("going to preempt %u running on %s", */ /* job_ptr->job_id, found_record->bg_block_id); */ - } else { + } else if(SELECT_IS_MODE_RUN_NOW(query_mode)) { error("Job %u running on block %s " "wasn't in the preempt list, but needs to be " "preempted for queried job to run on block %s", @@ -1422,6 +1429,8 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap, if (preemptee_candidates && preemptee_job_list && list_count(preemptee_candidates)) local_mode |= SELECT_MODE_PREEMPT_FLAG; + else + local_mode |= SELECT_MODE_CHECK_FULL; if(bg_conf->layout_mode == LAYOUT_DYNAMIC) slurm_mutex_lock(&create_dynamic_mutex); @@ -1487,8 +1496,8 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap, block_list = copy_bg_list(bg_lists->main); slurm_mutex_unlock(&block_state_mutex); - /* just remove the preemptable jobs now since we are treating - this as a run now deal */ + /* First look at the empty space, and then remove the + preemptable jobs and try again. */ preempt: list_sort(block_list, (ListCmpF)bg_record_sort_aval_inc); @@ -1538,18 +1547,6 @@ preempt: job_ptr->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &bg_record->node_cnt); - - /* This is a fake record so we need to - * destroy it after we get the info from - * it. if it was just testing then - * we added this record to the - * block_list. If this is the case - * it will be set below, but set - * blocks_added to 0 since we don't - * want to sync this with the list. */ - if(!blocks_added) - destroy_bg_record(bg_record); - blocks_added = 0; } else { if((bg_record->ionodes) && (job_ptr->part_ptr->max_share <= 1)) @@ -1596,12 +1593,29 @@ preempt: if(*preemptee_job_list) list_destroy(*preemptee_job_list); *preemptee_job_list = _get_preemptables( - bg_record, preemptee_candidates); + local_mode, bg_record, + preemptee_candidates); + } + if(!bg_record->bg_block_id) { + /* This is a fake record so we need to + * destroy it after we get the info from + * it. If it was just testing then + * we added this record to the + * block_list. If this is the case + * it will be handled if se sync the + * lists. But we don't want to do + * that so we will set blocks_added to + * 0 so it doesn't happen. */ + if(!blocks_added) + destroy_bg_record(bg_record); + blocks_added = 0; } } else { error("we got a success, but no block back"); } } else if(!preempt_done && SELECT_IS_PREEMPT_SET(local_mode)) { + debug2("doing preemption"); + local_mode |= SELECT_MODE_CHECK_FULL; avail_cpus += _remove_preemptables( block_list, preemptee_candidates); preempt_done = true; diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c index 93f5ddfc6..922909d51 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.c +++ b/src/plugins/select/bluegene/plugin/bluegene.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * bluegene.c - blue gene node configuration processing module. * - * $Id: bluegene.c 19755 2010-03-16 19:15:43Z da $ + * $Id: bluegene.c 20411 2010-06-03 23:44:06Z da $ ***************************************************************************** * Copyright (C) 2004 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -164,7 +164,6 @@ extern bool blocks_overlap(bg_record_t *rec_a, bg_record_t *rec_b) return true; } - if (!bit_overlap(rec_a->bitmap, rec_b->bitmap)) return false; diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c index 69e76d730..39cd508f1 100644 --- a/src/plugins/select/linear/select_linear.c +++ b/src/plugins/select/linear/select_linear.c @@ -89,15 +89,6 @@ int node_record_count; time_t last_node_update; struct switch_record *switch_record_table; int switch_record_cnt; -#ifdef HAVE_AIX -/* On AIX dlopen's RTLD_LAZY flag does NOT work, so we must define - * actual functions here in order to load this plugin from anywhere - * other than slurmctld */ -void job_preempt_remove(uint32_t job_id) -{ - ; -} -#endif struct select_nodeinfo { uint16_t magic; /* magic number */ @@ -598,6 +589,10 @@ static int _job_count_bitmap(struct node_cr_record *node_cr_ptr, } else job_mem = job_memory_node; } + if ((alloc_mem + job_mem) > avail_mem) { + bit_clear(jobmap, i); + continue; + } } if (node_cr_ptr[i].exclusive_cnt != 0) { diff --git a/src/sacct/options.c b/src/sacct/options.c index 77d4d41be..bc0eac979 100644 --- a/src/sacct/options.c +++ b/src/sacct/options.c @@ -988,6 +988,10 @@ void parse_command_line(int argc, char **argv) || job_cond->used_nodes) all_users=1; + /* set all_users for user root if not requesting any */ + if(!job_cond->userid_list && !params.opt_uid) + all_users = 1; + if(all_users) { if(job_cond->userid_list && list_count(job_cond->userid_list)) { list_destroy(job_cond->userid_list); diff --git a/src/sacct/print.c b/src/sacct/print.c index baad19f00..d0be850d1 100644 --- a/src/sacct/print.c +++ b/src/sacct/print.c @@ -1126,7 +1126,6 @@ void print_fields(type_t type, void *object) case JOBSTEP: tmp_int = step->sys_cpu_sec; tmp_int2 = step->sys_cpu_usec; - break; case JOBCOMP: @@ -1145,10 +1144,18 @@ void print_fields(type_t type, void *object) case PRINT_TIMELIMIT: switch(type) { case JOB: - + if (job->timelimit == INFINITE) + tmp_char = "UNLIMITED"; + else if (job->timelimit == NO_VAL) + tmp_char = "Partition_Limit"; + else if(job->timelimit) { + char tmp1[128]; + mins2time_str(job->timelimit, + tmp1, sizeof(tmp1)); + tmp_char = tmp1; + } break; case JOBSTEP: - break; case JOBCOMP: tmp_char = job_comp->timelimit; @@ -1237,12 +1244,11 @@ void print_fields(type_t type, void *object) switch(type) { case JOB: tmp_int = job->user_cpu_sec; - tmp_int2 = job->tot_cpu_usec; + tmp_int2 = job->user_cpu_usec; break; case JOBSTEP: tmp_int = step->user_cpu_sec; tmp_int2 = step->user_cpu_usec; - break; case JOBCOMP: diff --git a/src/sacct/sacct.h b/src/sacct/sacct.h index 0bec9148d..b4cb40da0 100644 --- a/src/sacct/sacct.h +++ b/src/sacct/sacct.h @@ -68,10 +68,10 @@ #define BRIEF_FIELDS "jobid,state,exitcode" #define BRIEF_COMP_FIELDS "jobid,uid,state" #define DEFAULT_FIELDS "jobid,jobname,partition,account,alloccpus,state,exitcode" -#define DEFAULT_COMP_FIELDS "jobid,uid,jobname,partition,nnodes,nodes,state,end" +#define DEFAULT_COMP_FIELDS "jobid,uid,jobname,partition,nnodes,nodelist,state,end" #define LONG_FIELDS "jobid,jobname,partition,maxvmsize,maxvmsizenode,maxvmsizetask,avevmsize,maxrss,maxrssnode,maxrsstask,averss,maxpages,maxpagesnode,maxpagestask,avepages,mincpu,mincpunode,mincputask,avecpu,ntasks,alloccpus,elapsed,state,exitcode" -#define LONG_COMP_FIELDS "jobid,uid,jobname,partition,nnodes,nodes,state,start,end,timelimit" +#define LONG_COMP_FIELDS "jobid,uid,jobname,partition,nnodes,nodelist,state,start,end,timelimit" #define BUFFER_SIZE 4096 #define STATE_COUNT 10 diff --git a/src/sacctmgr/sacctmgr.c b/src/sacctmgr/sacctmgr.c index 0489d0fd0..81eaab60c 100644 --- a/src/sacctmgr/sacctmgr.c +++ b/src/sacctmgr/sacctmgr.c @@ -814,14 +814,14 @@ sacctmgr [<OPTION>] [<COMMAND>] \n\ WithCoordinators, WithRawQOS, and WOPLimits \n\ add account - Clusters=, Description=, Fairshare=, \n\ GrpCPUMins=, GrpCPUs=, GrpJobs=, GrpNodes=, \n\ - GrpSubmitJob=, GrpWall=, MaxCPUMins=, MaxJobs=,\n\ - MaxNodes=, MaxSubmitJobs=, MaxWall=, Names=, \n\ - Organization=, Parent=, and QosLevel \n\ + GrpSubmitJob=, GrpWall=, MaxCPUMins=, MaxCPUs=,\n\ + MaxJobs=, MaxNodes=, MaxSubmitJobs=, MaxWall=, \n\ + Names=, Organization=, Parent=, and QosLevel= \n\ modify account - (set options) Description=, Fairshare=, \n\ GrpCPUMins=, GrpCPUs=, GrpJobs=, GrpNodes=, \n\ - GrpSubmitJob=, GrpWall=, MaxCPUMins=, MaxJobs=,\n\ - MaxNodes=, MaxSubmitJobs=, MaxWall=, Names=, \n\ - Organization=, Parent=, and QosLevel= \n\ + GrpSubmitJob=, GrpWall=, MaxCPUMins=, MaxCPUs=,\n\ + MaxJobs=, MaxNodes=, MaxSubmitJobs=, MaxWall=, \n\ + Names=, Organization=, Parent=, and QosLevel= \n\ (where options) Clusters=, Descriptions=, \n\ Names=, Organizations=, Parent=, and QosLevel= \n\ delete account - Clusters=, Descriptions=, Names=, \n\ diff --git a/src/salloc/opt.c b/src/salloc/opt.c index 3253ec573..a45c5c0a5 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -1196,6 +1196,11 @@ static bool _opt_verify(void) verified = false; } + if ((opt.ntasks_per_node > -1) && (!opt.nprocs_set)) { + opt.nprocs = opt.min_nodes * opt.ntasks_per_node; + opt.nprocs_set = 1; + } + if (opt.mincpus < opt.cpus_per_task) opt.mincpus = opt.cpus_per_task; diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index 8c963d051..1f9155a03 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -1981,6 +1981,11 @@ static bool _opt_verify(void) _fullpath(&opt.ifname, opt.cwd); _fullpath(&opt.ofname, opt.cwd); + if ((opt.ntasks_per_node > 0) && (!opt.nprocs_set)) { + opt.nprocs = opt.min_nodes * opt.ntasks_per_node; + opt.nprocs_set = 1; + } + if (opt.mincpus < opt.cpus_per_task) opt.mincpus = opt.cpus_per_task; diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 408df3f46..b05e78e7a 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -1019,7 +1019,10 @@ static int _accounting_cluster_ready() int procs = 0; bitstr_t *total_node_bitmap = NULL; char *cluster_nodes = NULL; + slurmctld_lock_t node_read_lock = { + NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; + lock_slurmctld(node_read_lock); node_ptr = node_record_table_ptr; for (i = 0; i < node_record_count; i++, node_ptr++) { if (node_ptr->name == '\0') @@ -1047,6 +1050,7 @@ static int _accounting_cluster_ready() bit_nset(total_node_bitmap, 0, node_record_count-1); cluster_nodes = bitmap2node_name(total_node_bitmap); FREE_NULL_BITMAP(total_node_bitmap); + unlock_slurmctld(node_read_lock); rc = clusteracct_storage_g_cluster_procs(acct_db_conn, slurmctld_cluster_name, @@ -1160,9 +1164,6 @@ static void *_slurmctld_background(void *no_data) * (Might kill jobs on nodes set DOWN) */ slurmctld_lock_t node_write_lock = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; - /* Locks: Read node */ - slurmctld_lock_t node_read_lock = { - NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; /* Locks: Write node */ slurmctld_lock_t node_write_lock2 = { NO_LOCK, NO_LOCK, WRITE_LOCK, NO_LOCK }; @@ -1335,9 +1336,7 @@ static void *_slurmctld_background(void *no_data) /* Report current node state to account for added * or reconfigured nodes */ last_node_acct = now; - lock_slurmctld(node_read_lock); _accounting_cluster_ready(); - unlock_slurmctld(node_read_lock); } /* Reassert this machine as the primary controller. diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index f7c373206..634119c59 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -8079,46 +8079,3 @@ _read_job_ckpt_file(char *ckpt_file, int *size_ptr) *size_ptr = data_size; return data; } - -/* - * Preempt a job using the proper job removal mechanism (checkpoint, requeue). - * Do not use this function for job suspend/resume. This is handled by the - * gang module. - */ -extern void job_preempt_remove(uint32_t job_id) -{ - int rc = SLURM_SUCCESS; - uint16_t preempt_mode = slurm_get_preempt_mode(); - checkpoint_msg_t ckpt_msg; - - preempt_mode &= (~PREEMPT_MODE_GANG); - if (preempt_mode == PREEMPT_MODE_REQUEUE) { - rc = job_requeue(0, job_id, -1); - if (rc == SLURM_SUCCESS) { - info("preempted job %u has been requeued", job_id); - } - } else if (preempt_mode == PREEMPT_MODE_CANCEL) { - (void) job_signal(job_id, SIGKILL, 0, 0); - } else if (preempt_mode == PREEMPT_MODE_CHECKPOINT) { - memset(&ckpt_msg, 0, sizeof(checkpoint_msg_t)); - ckpt_msg.op = CHECK_VACATE; - ckpt_msg.job_id = job_id; - rc = job_checkpoint(&ckpt_msg, 0, -1); - if (rc == SLURM_SUCCESS) { - info("preempted job %u has been checkpointed", job_id); - } - } else { - error("Invalid preempt_mode: %u", preempt_mode); - } - - if (rc != SLURM_SUCCESS) { - rc = job_signal(job_id, SIGKILL, 0, 0); - if (rc == SLURM_SUCCESS) - info("preempted job %u had to be killed", job_id); - else { - info("preempted job %u kill failure %s", - job_id, slurm_strerror(rc)); - } - } -} - diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index a2ffe2c0f..11dfee940 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -509,7 +509,8 @@ extern void sort_job_queue(struct job_queue *job_queue, int job_queue_size) job_queue[i].job_ptr = job_queue[top_prio_inx].job_ptr; job_queue[i].job_priority = job_queue[top_prio_inx]. job_priority; - + job_queue[i].part_priority = job_queue[top_prio_inx]. + part_priority; job_queue[top_prio_inx].job_ptr = tmp_job_ptr; job_queue[top_prio_inx].job_priority = tmp_job_prio; @@ -1130,14 +1131,15 @@ static char **_build_env(struct job_record *job_ptr) #ifdef HAVE_CRAY_XT select_g_select_jobinfo_get(job_ptr->select_jobinfo, - SELECT_JOBDATA_RESV_ID, &name); + SELECT_JOBDATA_RESV_ID, &name); setenvf(&env, "BASIL_RESERVATION_ID", "%s", name); xfree(name); #endif #ifdef HAVE_BG select_g_select_jobinfo_get(job_ptr->select_jobinfo, - SELECT_JOBDATA_BLOCK_ID, &name); + SELECT_JOBDATA_BLOCK_ID, &name); setenvf(&my_env, "MPIRUN_PARTITION", "%s", name); + xfree(name); #endif setenvf(&my_env, "SLURM_JOB_ACCOUNT", "%s", job_ptr->account); if (job_ptr->details) { diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 34130d412..ec70dd080 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -1347,11 +1347,6 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg) #endif } } else { - uint16_t err_cpus = 0; - select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, - SELECT_NODEDATA_SUBCNT, - NODE_STATE_ERROR, - &err_cpus); if (IS_NODE_UNKNOWN(node_ptr)) { last_node_update = now; reset_job_priority(); @@ -1365,8 +1360,7 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg) node_flags; node_ptr->last_idle = now; } - if (!err_cpus - && !IS_NODE_DRAIN(node_ptr) + if (!IS_NODE_DRAIN(node_ptr) && !IS_NODE_FAIL(node_ptr)) { xfree(node_ptr->reason); clusteracct_storage_g_node_up( @@ -1392,8 +1386,7 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg) reg_msg->node_name); reset_job_priority(); trigger_node_up(node_ptr); - if (!err_cpus - && !IS_NODE_DRAIN(node_ptr) + if (!IS_NODE_DRAIN(node_ptr) && !IS_NODE_FAIL(node_ptr)) { xfree(node_ptr->reason); clusteracct_storage_g_node_up( @@ -1579,11 +1572,6 @@ extern int validate_nodes_via_front_end( #endif } } else { - uint16_t err_cpus = 0; - select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, - SELECT_NODEDATA_SUBCNT, - NODE_STATE_ERROR, - &err_cpus); if (reg_hostlist) (void) hostlist_push_host( reg_hostlist, node_ptr->name); @@ -1603,8 +1591,7 @@ extern int validate_nodes_via_front_end( node_flags; node_ptr->last_idle = now; } - if (!err_cpus - && !IS_NODE_DRAIN(node_ptr) + if (!IS_NODE_DRAIN(node_ptr) && !IS_NODE_FAIL(node_ptr)) { xfree(node_ptr->reason); clusteracct_storage_g_node_up( @@ -1626,8 +1613,7 @@ extern int validate_nodes_via_front_end( node_ptr->last_idle = now; } trigger_node_up(node_ptr); - if (!err_cpus - && !IS_NODE_DRAIN(node_ptr) + if (!IS_NODE_DRAIN(node_ptr) && !IS_NODE_FAIL(node_ptr)) { xfree(node_ptr->reason); clusteracct_storage_g_node_up( diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 77052471f..0d8654b08 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -609,14 +609,12 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, bitstr_t *avail_bitmap = NULL, *total_bitmap = NULL; bitstr_t *backup_bitmap = NULL; bitstr_t *possible_bitmap = NULL; - bitstr_t *partially_idle_node_bitmap = NULL; int max_feature, min_feature; bool runable_ever = false; /* Job can ever run */ bool runable_avail = false; /* Job can run with available nodes */ bool tried_sched = false; /* Tried to schedule with avail nodes */ static uint32_t cr_enabled = NO_VAL; bool preempt_flag = false; - select_type_plugin_info_t cr_type = SELECT_TYPE_INFO_NONE; int shared = 0, select_mode; if (test_only) @@ -643,34 +641,14 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, shared = _resolve_shared_status(job_ptr->details->shared, part_ptr->max_share, cr_enabled); job_ptr->details->shared = shared; + if (cr_enabled) + job_ptr->cr_enabled = cr_enabled; /* CR enabled for this job */ /* If job preemption is enabled, then do NOT limit the set of available * nodes by their current 'sharable' or 'idle' setting */ if (slurm_get_preempt_mode() != PREEMPT_MODE_OFF) preempt_flag = true; - if (cr_enabled) { - /* Determine which nodes might be used by this job based upon - * its ability to share resources */ - job_ptr->cr_enabled = cr_enabled; /* CR enabled for this job */ - - cr_type = (select_type_plugin_info_t) slurmctld_conf. - select_type_param; - - /* Set the partially_idle_node_bitmap to reflect the - * idle and partially idle nodes */ - error_code = select_g_get_info_from_plugin (SELECT_BITMAP, - job_ptr, &partially_idle_node_bitmap); - if (error_code != SLURM_SUCCESS) { - FREE_NULL_BITMAP(partially_idle_node_bitmap); - return error_code; - } - debug3("Job %u shared %d CR type %d num_procs %d nbits %d", - job_ptr->job_id, shared, cr_enabled, cr_type, - job_ptr->num_procs, - bit_set_count(partially_idle_node_bitmap)); - } - if (job_ptr->details->req_node_bitmap) { /* specific nodes required */ /* We have already confirmed that all of these nodes have a * usable configuration and are in the proper partition. @@ -680,7 +658,6 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, job_ptr->details->req_node_bitmap); } if (total_nodes > max_nodes) { /* exceeds node limit */ - FREE_NULL_BITMAP(partially_idle_node_bitmap); return ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE; } @@ -688,33 +665,20 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, /* Should we check memory availability on these nodes? */ if (!bit_super_set(job_ptr->details->req_node_bitmap, avail_node_bitmap)) { - FREE_NULL_BITMAP(partially_idle_node_bitmap); return ESLURM_NODE_NOT_AVAIL; } - if (partially_idle_node_bitmap) { - if (!bit_super_set(job_ptr->details->req_node_bitmap, - partially_idle_node_bitmap)) { - FREE_NULL_BITMAP(partially_idle_node_bitmap); - return ESLURM_NODES_BUSY; - } - } - if (!preempt_flag) { if (shared) { if (!bit_super_set(job_ptr->details-> req_node_bitmap, share_node_bitmap)) { - FREE_NULL_BITMAP( - partially_idle_node_bitmap); return ESLURM_NODES_BUSY; } } else { if (!bit_super_set(job_ptr->details-> req_node_bitmap, idle_node_bitmap)) { - FREE_NULL_BITMAP( - partially_idle_node_bitmap); return ESLURM_NODES_BUSY; } } @@ -759,11 +723,6 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, } bit_and(node_set_ptr[i].my_bitmap, avail_node_bitmap); - if (partially_idle_node_bitmap) { - bit_and(node_set_ptr[i].my_bitmap, - partially_idle_node_bitmap); - } - if (!preempt_flag) { if (shared) { bit_and(node_set_ptr[i].my_bitmap, @@ -840,7 +799,6 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, avail_nodes = 0; break; } - FREE_NULL_BITMAP(partially_idle_node_bitmap); FREE_NULL_BITMAP(total_bitmap); FREE_NULL_BITMAP(possible_bitmap); *select_bitmap = avail_bitmap; @@ -870,7 +828,6 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, preemptee_job_list); if ((pick_code == SLURM_SUCCESS) && (bit_set_count(avail_bitmap) <= max_nodes)) { - FREE_NULL_BITMAP(partially_idle_node_bitmap); FREE_NULL_BITMAP(total_bitmap); FREE_NULL_BITMAP(possible_bitmap); *select_bitmap = avail_bitmap; @@ -949,7 +906,6 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, } else { FREE_NULL_BITMAP(possible_bitmap); } - FREE_NULL_BITMAP(partially_idle_node_bitmap); return error_code; } diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index e25764399..191598346 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -860,13 +860,6 @@ extern int job_fail(uint32_t job_id); */ extern int job_node_ready(uint32_t job_id, int *ready); -/* - * Preempt a job using the proper job removal mechanism (checkpoint, requeue). - * Do not use this function for job suspend/resume. This is handled by the - * gang module. - */ -extern void job_preempt_remove(uint32_t job_id); - /* * job_restart - Restart a batch job from checkpointed state * diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index 1cc41c4a4..29efe3a52 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -1111,11 +1111,11 @@ _rpc_batch_job(slurm_msg_t *msg) */ #ifdef HAVE_BG select_g_select_jobinfo_get(req->select_jobinfo, - SELECT_JOBDATA_BLOCK_ID, &resv_id); + SELECT_JOBDATA_BLOCK_ID, &resv_id); #endif #ifdef HAVE_CRAY_XT select_g_select_jobinfo_get(req->select_jobinfo, - SELECT_JOBDATA_RESV_ID, &resv_id); + SELECT_JOBDATA_RESV_ID, &resv_id); #endif rc = _run_prolog(req->job_id, req->uid, resv_id, req->spank_job_env, req->spank_job_env_size); @@ -3336,9 +3336,11 @@ _run_prolog(uint32_t jobid, uid_t uid, char *resv_id, char *my_prolog; char **my_env = _build_env(jobid, uid, resv_id, spank_job_env, spank_job_env_size); + /* a long prolog is expected on bluegene systems */ +#ifndef HAVE_BG time_t start_time = time(NULL), diff_time; static uint16_t msg_timeout = 0; - +#endif slurm_mutex_lock(&conf->config_mutex); my_prolog = xstrdup(conf->prolog); slurm_mutex_unlock(&conf->config_mutex); @@ -3349,6 +3351,7 @@ _run_prolog(uint32_t jobid, uid_t uid, char *resv_id, xfree(my_prolog); _destroy_env(my_env); +#ifndef HAVE_BG diff_time = difftime(time(NULL), start_time); if (msg_timeout == 0) msg_timeout = slurm_get_msg_timeout(); @@ -3356,7 +3359,7 @@ _run_prolog(uint32_t jobid, uid_t uid, char *resv_id, error("prolog for job %u ran for %d seconds", jobid, diff_time); } - +#endif return error_code; } diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index a32336089..4b38bbc8a 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/mgr.c - job manager functions for slurmstepd - * $Id: mgr.c 20036 2010-04-15 18:52:13Z da $ + * $Id: mgr.c 20276 2010-05-18 16:51:08Z da $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008-2009 Lawrence Livermore National Security. @@ -910,6 +910,7 @@ job_manager(slurmd_job_t *job) */ if(!conf->job_acct_gather_freq) jobacct_gather_g_stat_task(0); + /* Send job launch response with list of pids */ _send_launch_resp(job, 0); diff --git a/src/sreport/sreport.c b/src/sreport/sreport.c index a7fc6ae3a..7f44d7d2b 100644 --- a/src/sreport/sreport.c +++ b/src/sreport/sreport.c @@ -516,6 +516,30 @@ _process_command (int argc, char *argv[]) argv[0]); } exit_flag = 1; + } else if (strncasecmp (argv[0], "nonparsable", + MAX(command_len, 4)) == 0) { + if (argc > 1) { + exit_code = 1; + fprintf (stderr, "too many arguments for keyword:%s\n", + argv[0]); + } + print_fields_parsable_print = 0; + } else if (strncasecmp (argv[0], "parsable", + MAX(command_len, 8)) == 0) { + if (argc > 1) { + exit_code = 1; + fprintf (stderr, "too many arguments for keyword:%s\n", + argv[0]); + } + print_fields_parsable_print = PRINT_FIELDS_PARSABLE_ENDING; + } else if (strncasecmp (argv[0], "parsable2", + MAX(command_len, 9)) == 0) { + if (argc > 1) { + exit_code = 1; + fprintf (stderr, "too many arguments for keyword:%s\n", + argv[0]); + } + print_fields_parsable_print = PRINT_FIELDS_PARSABLE_NO_ENDING; } else if ((strncasecmp (argv[0], "reservation", MAX(command_len, 2)) == 0) || (strncasecmp (argv[0], "resv", @@ -649,6 +673,7 @@ sreport [<OPTION>] [<COMMAND>] \n\ Valid <COMMAND> values are: \n\ exit Terminate sreport \n\ help Print this description of use. \n\ + nonparsable Return output to normal \n\ parsable Output will be | delimited with an ending '|' \n\ parsable2 Output will be | delimited without an ending '|' \n\ quiet Print no messages other than error messages. \n\ diff --git a/src/srun/allocate.c b/src/srun/allocate.c index 31f76c3bb..712cece8b 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -806,6 +806,7 @@ create_job_step(srun_job_t *job, bool use_all_cpus) (difftime(time(NULL), begin_time) > opt.immediate))) || ((rc != ESLURM_NODES_BUSY) && (rc != ESLURM_PORTS_BUSY) && (rc != ESLURM_PROLOG_RUNNING) && + (rc != SLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT) && (rc != ESLURM_DISABLED))) { error ("Unable to create job step: %m"); return -1; diff --git a/src/sstat/print.c b/src/sstat/print.c index 86ca691c6..f07a8ecd1 100644 --- a/src/sstat/print.c +++ b/src/sstat/print.c @@ -1,5 +1,5 @@ /*****************************************************************************\ - * print.c - print functions for sacct + * print.c - print functions for sstat * * $Id: print.c 7541 2006-03-18 01:44:58Z da $ ***************************************************************************** @@ -93,7 +93,7 @@ void print_fields(jobacct_step_rec_t *step) switch(field->type) { case PRINT_AVECPU: - tmp_char = _elapsed_time((int)step->sacct.ave_cpu, 0); + tmp_char = _elapsed_time((long)step->sacct.ave_cpu, 0); field->print_routine(field, tmp_char, @@ -206,7 +206,7 @@ void print_fields(jobacct_step_rec_t *step) (curr_inx == field_count)); break; case PRINT_MINCPU: - tmp_char = _elapsed_time((int)step->sacct.min_cpu, 0); + tmp_char = _elapsed_time((long)step->sacct.min_cpu, 0); field->print_routine(field, tmp_char, (curr_inx == field_count)); diff --git a/src/sstat/sstat.c b/src/sstat/sstat.c index b3cc2d49f..3f0fe8037 100644 --- a/src/sstat/sstat.c +++ b/src/sstat/sstat.c @@ -132,7 +132,8 @@ int _sstat_query(slurm_step_layout_t *step_layout, uint32_t job_id, jobacct_msg = (stat_jobacct_msg_t *) ret_data_info->data; if(jobacct_msg) { - debug2("got it back for job %d", + debug2("got it back from %s for job %d", + ret_data_info->node_name, jobacct_msg->job_id); jobacct_gather_g_2_sacct( &temp_sacct, @@ -144,14 +145,16 @@ int _sstat_query(slurm_step_layout_t *step_layout, uint32_t job_id, case RESPONSE_SLURM_RC: rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); - error("there was an error with the request rc = %s", - slurm_strerror(rc)); + error("there was an error with the request to " + "%s rc = %s", + ret_data_info->node_name, slurm_strerror(rc)); break; default: rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); - error("unknown return given %d rc = %s", - ret_data_info->type, slurm_strerror(rc)); + error("unknown return given from %s: %d rc = %s", + ret_data_info->node_name, ret_data_info->type, + slurm_strerror(rc)); break; } } diff --git a/src/sview/common.c b/src/sview/common.c index 287f60ee1..3fa146a34 100644 --- a/src/sview/common.c +++ b/src/sview/common.c @@ -30,11 +30,35 @@ #include "src/sview/sview.h" #include "src/common/parse_time.h" +static bool menu_right_pressed = false; + typedef struct { GtkTreeModel *model; GtkTreeIter iter; } treedata_t; +/* These next 2 functions are here to make it so we don't magically + * click on something before we really want to in a menu. + */ +static gboolean _menu_button_pressed(GtkWidget *widget, GdkEventButton *event, + gpointer extra) +{ + if(event->button == 3) { + menu_right_pressed = true; + return true; + } + return false; +} + +static gboolean _menu_button_released(GtkWidget *widget, GdkEventButton *event, + gpointer extra) +{ + if(event->button == 3 && !menu_right_pressed) + return true; + menu_right_pressed = false; + return false; +} + static gboolean _entry_changed(GtkWidget *widget, void *msg) { global_entry_changed = 1; @@ -372,8 +396,7 @@ static void _popup_state_changed(GtkCheckMenuItem *menuitem, (display_data->refresh)(NULL, display_data->user_data); } -static void _selected_page(GtkMenuItem *menuitem, - display_data_t *display_data) +static void _selected_page(GtkMenuItem *menuitem, display_data_t *display_data) { treedata_t *treedata = (treedata_t *)display_data->user_data; @@ -510,6 +533,13 @@ extern void make_fields_menu(popup_info_t *popup_win, GtkMenu *menu, if(popup_win && popup_win->spec_info->type == INFO_PAGE) return; + g_signal_connect(G_OBJECT(menu), "button-press-event", + G_CALLBACK(_menu_button_pressed), + NULL); + g_signal_connect(G_OBJECT(menu), "button-release-event", + G_CALLBACK(_menu_button_released), + NULL); + for(i=0; i<count; i++) { while(display_data++) { if(display_data->id == -1) @@ -550,6 +580,14 @@ extern void make_options_menu(GtkTreeView *tree_view, GtkTreePath *path, GtkWidget *menuitem = NULL; treedata_t *treedata = xmalloc(sizeof(treedata_t)); treedata->model = gtk_tree_view_get_model(tree_view); + + g_signal_connect(G_OBJECT(menu), "button-press-event", + G_CALLBACK(_menu_button_pressed), + NULL); + g_signal_connect(G_OBJECT(menu), "button-release-event", + G_CALLBACK(_menu_button_released), + NULL); + if (!gtk_tree_model_get_iter(treedata->model, &treedata->iter, path)) { g_error("make menus error getting iter from model\n"); return; @@ -1556,3 +1594,44 @@ extern void sview_widget_modify_bg(GtkWidget *widget, GtkStateType state, /* g_print("%d 3 took %s\n", grid_button->inx, TIME_STR); */ } + +extern void sview_radio_action_set_current_value(GtkRadioAction *action, + gint current_value) +{ +#ifdef GTK2_USE_RADIO_SET + gtk_radio_action_set_current_value(action, current_value); +#else + GSList *slist, *group; + int i=0; + /* gtk_radio_action_set_current_value wasn't added to + GTK until 2.10, it turns out this is what is required to + set the correct value. + */ + g_return_if_fail(GTK_IS_RADIO_ACTION(action)); + if((group = gtk_radio_action_get_group(action))) { + /* for some reason groups are set backwards like a + stack, g_slist_reverse will fix this but takes twice + as long so just figure out the length, they add 1 + to it sense 0 isn't a number and then subtract the + value to get the augmented in the stack. + */ + current_value = g_slist_length(group) - 1 - current_value; + if(current_value < 0) { + g_warning("Radio group does not contain an action " + "with value '%d'\n", current_value); + return; + } + + for (slist = group; slist; slist = slist->next) { + if(i == current_value) { + gtk_toggle_action_set_active( + GTK_TOGGLE_ACTION(slist->data), TRUE); + g_object_set(action, "value", + current_value, NULL); + return; + } + i++; + } + } +#endif +} diff --git a/src/sview/sview.c b/src/sview/sview.c index 997667d04..1c0158989 100644 --- a/src/sview/sview.c +++ b/src/sview/sview.c @@ -344,7 +344,6 @@ static void _reconfigure(GtkToggleAction *action) static void _get_current_debug(GtkRadioAction *action) { -#ifdef GTK2_USE_RADIO_SET static int debug_level = 0; static slurm_ctl_conf_info_msg_t *slurm_ctl_conf_ptr = NULL; static GtkAction *debug_action = NULL; @@ -360,15 +359,8 @@ static void _get_current_debug(GtkRadioAction *action) changed so we need to make it happen here */ if(debug_level == 0) debug_inited = 1; - gtk_radio_action_set_current_value(GTK_RADIO_ACTION(debug_action), - debug_level); -#else - /* Since gtk_radio_action_set_current_value wasn't added to - GTK until 2.10 we have to just not set the first one here - correctly unless we are running with it. - */ - debug_inited = 1; -#endif + sview_radio_action_set_current_value(GTK_RADIO_ACTION(debug_action), + debug_level); } static void _set_debug(GtkRadioAction *action, diff --git a/src/sview/sview.h b/src/sview/sview.h index eb0b34208..b868dbd35 100644 --- a/src/sview/sview.h +++ b/src/sview/sview.h @@ -505,4 +505,6 @@ extern void add_display_treestore_line_with_font( char *font); extern void sview_widget_modify_bg(GtkWidget *widget, GtkStateType state, const GdkColor color); +extern void sview_radio_action_set_current_value(GtkRadioAction *action, + gint current_value); #endif diff --git a/testsuite/expect/Makefile.am b/testsuite/expect/Makefile.am index 044904a01..d42c29a18 100644 --- a/testsuite/expect/Makefile.am +++ b/testsuite/expect/Makefile.am @@ -325,6 +325,7 @@ EXTRA_DIST = \ test21.25 \ test21.26 \ test21.27 \ + test21.28 \ test22.1 \ test22.2 \ test23.1 \ diff --git a/testsuite/expect/README b/testsuite/expect/README index dce472d94..6cdc2c56d 100644 --- a/testsuite/expect/README +++ b/testsuite/expect/README @@ -530,6 +530,8 @@ test21.23 sacctmgr wckey test21.24 sacctmgr dump file test21.25 sacctmgr show config test21.26 sacctmgr +=, -= modify QoS test +test21.27 sacctmgr problems test +test21.25 sacctmgr abort delete test22.# Testing of sreport commands and options. These also test the sacctmgr archive dump/load functions. diff --git a/testsuite/expect/globals b/testsuite/expect/globals index a0c3e2e7c..8fe0b067c 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -641,6 +641,37 @@ proc test_assoc_enforced { } { return $assoc_enforced } +################################################################ +# +# Proc: test_assoc_enforced +# +# Purpose: Determine if we need an association to run a job. +# This is based upon +# the value of AccountingStorageEnforce in the slurm.conf. +# +# Returns level of association enforcement, 0 if none +# +################################################################ +proc test_limits_enforced { } { + global scontrol number + + log_user 0 + set enforced 0 + spawn $scontrol show config + expect { + -re "AccountingStorageEnforce *= (\[a-z]+),limits" { + set enforced 1 + exp_continue + } + eof { + wait + } + } + + log_user 1 + return $enforced +} + ################################################################ # # Proc: test_power_save diff --git a/testsuite/expect/test21.21 b/testsuite/expect/test21.21 index 2d1d41f52..a309b4ea7 100755 --- a/testsuite/expect/test21.21 +++ b/testsuite/expect/test21.21 @@ -2,7 +2,7 @@ ############################################################################ # Purpose: Test of SLURM functionality # sacctmgr add an account to this cluster and try using it with -# salloc, sbatch and srun. +# salloc, sbatch and srun. We also test limits here as well. # # Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR # "FAILURE: ..." otherwise with an explanation of the failure, OR @@ -32,6 +32,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ############################################################################ source ./globals +source ./globals_accounting set test_id "21.21" set exit_code 0 @@ -40,6 +41,237 @@ set test_acct "test_acct" set timeout 60 print_header $test_id +proc _test_limits { } { + global test_acct file_in srun sbatch squeue scancel bin_id number bin_sleep bin_rm + # test maxcpumin maxcpu maxjob maxnode maxsubmit maxwall + set maxcpumin 10 + set maxcpu 4 + set maxjob 2 + set maxnode 1 + set maxsubmit 4 + set maxwall 10 + set exit_code 0 + + incr exit_code [mod_acct "" "" "$test_acct" "" "" "" "" "" "" "" "" "" "" "" "" $maxjob $maxnode $maxsubmit $maxwall "" "" ""] + if { $exit_code } { + return $exit_code + } + # + # Spawn a job testing maxnode + # + set matches 0 + spawn $srun -v -N$maxnode --account=$test_acct $bin_id + expect { + -re "launching ($number)" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + eof { + wait + } + } + + if { $matches != 1 } { + send_user "\nFAILURE: job dion't launch with correct limit\n" + set exit_code 1 + return $exit_code + } + # + # Spawn a job testing maxnode+1 + # + spawn $srun -N[expr $maxnode + 1] --account=$test_acct $bin_id + expect { + -re "Job violates accounting policy" { + send_user "\nThis error is expected, not a problem\n" + exp_continue + } + -re "launching ($number)" { + send_user "\nFAILURE: job should not have run.\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + eof { + wait + } + } + + if { $exit_code } { + return $exit_code + } + + # + # Spawn a job testing maxwall + # + set matches 0 + spawn $srun -v -t$maxwall --account=$test_acct $bin_id + expect { + -re "launching ($number)" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + eof { + wait + } + } + + if { $matches != 1 } { + send_user "\nFAILURE: job dion't launch with correct limit\n" + set exit_code 1 + return $exit_code + } + # + # Spawn a job testing maxwall+1 + # + spawn $srun -t[expr $maxwall + 1] --account=$test_acct $bin_id + expect { + -re "Job violates accounting policy" { + send_user "\nThis error is expected, not a problem\n" + exp_continue + } + -re "launching ($number)" { + send_user "\nFAILURE: job should not have run.\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: srun not responding\n" + set exit_code 1 + } + eof { + wait + } + } + + if { $exit_code } { + return $exit_code + } + + # This should overwrite the old file_in which has already been used, + # so no big deal. + make_bash_script $file_in " + $bin_sleep 5 + " + + # test job max cnt and submit + for {set inx 0} {$inx < $maxsubmit} {incr inx} { + set job_id($inx) 0 + set mypid [spawn $sbatch -N1 -n1 --account=$test_acct --output=/dev/null --error=/dev/null -t5 $file_in] + expect { + -re "Submitted batch job ($number)" { + set job_id($inx) $expect_out(1,string) + exp_continue + } + -re "Unable to contact" { + send_user "\nFAILURE: slurm appears to be down\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + slow_kill $mypid + set exit_code 1 + } + eof { + wait + } + } + + if { !$job_id($inx) } { + send_user "\nFAILURE: sbatch didn't return jobid\n" + set exit_code 1 + exit + } + + if { $exit_code } { + break + } + } + + if { $exit_code } { + return $exit_code + } + + # then submit one more over the limit and it should fail + set mypid [spawn $sbatch -N1 -n1 --account=$test_acct --output=/dev/null --error=/dev/null -t5 $file_in] + expect { + -re "Job violates accounting policy" { + send_user "\nThis error is expected, not a problem\n" + exp_continue + } + -re "Submitted batch job ($number)" { + send_user "\nFAILURE: job should not have run.\n" + set exit_code 1 + exp_continue + } + -re "Unable to contact" { + send_user "\nFAILURE: slurm appears to be down\n" + set exit_code 1 + exp_continue + } + timeout { + send_user "\nFAILURE: sbatch not responding\n" + slow_kill $mypid + set exit_code 1 + } + eof { + wait + } + } + + if { $exit_code } { + return $exit_code + } + + set matches 0 + set mypid [spawn $squeue -o "\%i \%t \%r"] + expect { + -re "($job_id(2)|$job_id(3)).PD.AssociationJobLimit" { + incr matches + exp_continue + } + -re "($job_id(0)|$job_id(1)).R.None" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: squeue not responding\n" + slow_kill $mypid + set exit_code 1 + } + eof { + wait + } + } + + spawn $scancel --quiet --account=$test_acct + expect { + eof { + wait + } + } + + + if { $matches != 4 } { + send_user "\nFAILURE: job's are in the state expected got $matches\n" + set exit_code 1 + return $exit_code + } + + return $exit_code +} + # # Check accounting config and bail if not found. # @@ -249,6 +481,15 @@ if {$job_id == 0} { } } + +# +# Check to see if limits are enforced. +# +if { [test_limits_enforced] == 1 } { + set exit_code [_test_limits] +} + + # # Use sacctmgr to delete the test account # diff --git a/testsuite/expect/test21.27 b/testsuite/expect/test21.27 index 81fa81a04..38d87ef73 100755 --- a/testsuite/expect/test21.27 +++ b/testsuite/expect/test21.27 @@ -66,7 +66,7 @@ if { [test_account_storage] == 0 } { } if { [string compare [check_accounting_admin_level] "Administrator"] } { - send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n" + send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME set admin=admin.\n" exit 0 } diff --git a/testsuite/expect/test21.28 b/testsuite/expect/test21.28 new file mode 100755 index 000000000..4f777f204 --- /dev/null +++ b/testsuite/expect/test21.28 @@ -0,0 +1,366 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test of SLURM functionality +# sacctmgr delete N (abort) test +# +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2008 Lawrence Livermore National Security. +# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). +# Written by Joseph Donaghy <donaghy1@llnl.gov> +# CODE-OCEC-09-009. All rights reserved. +# +# This file is part of SLURM, a resource management program. +# For details, see <https://computing.llnl.gov/linux/slurm/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ +source ./globals_accounting + +set test_id "21.28" +set exit_code 0 +set tc1 tcluster1 +set tc2 tcluster2 +set tc3 tcluster3 +set fs1 2500 +set fs2 1700 +set fs3 1 +set mc1 1000000 +set mc2 700000 +set mc3 1 +set mj1 50 +set mj2 70 +set mj3 1 +set mn1 300 +set mn2 700 +set mn3 1 +set mw1 01:00:00 +set mw2 00:07:00 +set mw3 00:01:00 +set clu cluster +set cl1 1tmach +set cl2 2tmach +set cl3 3tmach +set acc account +set acc account +set nams names +set nm1 testaccta1 +set nm2 testaccta2 +set nm3 testaccta3 +set des Description +set ds1 testaccounta1 +set ds2 testacct +set org Organization +set or1 accountorga1 +set or2 acttrg +set qs QosLevel +set qs1 normal +set par parent +set usr user +set us1 tuser1 +set us2 tuser2 +set us3 tuser3 +set al AdminLevel +set aln None +set ala Administrator +set alo Operator +set dac DefaultAccount +set pts Partitions +set fs fairshare +set mc maxcpu +set mj maxjob +set mn maxnode +set mw maxwall +set dbu debug +set access_err 0 +set lista "$nm1 $nm2 $nm3" +set listc "$tc1 $tc2 $tc3" +set listu "$us1 $us2 $us3" +#set user_name "id -u -n" + +print_header $test_id + +set timeout 60 + +# +# Check accounting config and bail if not found. +# +if { [test_account_storage] == 0 } { + send_user "\nWARNING: This test can't be run without a usable AccountStorageType\n" + exit 0 +} + +if { [string compare [check_accounting_admin_level] "Administrator"] } { + send_user "\nWARNING: This test can't be run without being an Accounting administrator.\nUse sacctmgr mod user \$USER_NAME admin=admin.\n" + exit 0 +} + +#make sure we have a clean system and permission to do this work +remove_user "" "" "$us1,$us2,$us3" +remove_acct "" "$nm1,$nm2,$nm3" +remove_cluster "$tc1,$tc2,$tc3" +if {$access_err != 0} { + send_user "\nWARNING: not authorized to perform this test\n" + exit $exit_code +} + +#add cluster +incr exit_code [add_cluster "$tc1,$tc2,$tc3" "" "" "" "" "" "" "" "" "" "" "" ""] +if { $exit_code } { + remove_user "" "" "$us1,$us2,$us3" + remove_acct "" "$nm1,$nm2,$nm3" + remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code +} + +#add accounts +incr exit_code [add_acct "$tc1,$tc2,$tc3" "" "$nm1,$nm2,$nm3" "" "" "" "" "" "" "" "" "" "" "" "" "" "" "" ""] +if { $exit_code } { + remove_user "" "" "$us1,$us2,$us3" + remove_acct "" "$nm1,$nm2,$nm3" + remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code +} + +#add users +# account adminlevel cluster defaultaccount fs maxcpu maxjob maxnodes maxwall name +incr exit_code [add_user "$tc1,$tc2,$tc3" "$nm1,$nm2,$nm3" $us1,$us2,$us3 "" "$alo" "$nm2" "" "" "" "$fs1" "" "" "" "" "" "" "$mc1" "" "$mj1" "$mn1" "" "$mw1" ] +if { $exit_code } { + remove_user "" "" "$us1,$us2,$us3" + remove_acct "" "$nm1,$nm2,$nm3" + remove_cluster "$tc1,$tc2,$tc3" + exit $exit_code +} + +################################################################ +# +# Proc: list_test_users +# +# Purpose: Use sacctmgr to list the test user additions +# +# Returns: Number representing the number of entries found. +# +# Input: None +# +################################################################ +proc list_test_users { } { + global sacctmgr + global us1 us2 us3 nm1 nm2 nm3 tc1 tc2 tc3 fs1 mc1 mj1 mn1 mw1 alo + set matches 0 + set my_pid [spawn $sacctmgr -n -p list user format="User,DefaultA,Admin,clus,acc,fair,maxcpum,maxj,maxn,maxw" names=$us1,$us2,$us3 withassoc] + expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + } + -re "(($us1|$us2|$us3).$nm2.$alo.($tc1|$tc2|$tc3).($nm1|$nm2|$nm3).$fs1.$mc1.$mj1.$mn1.$mw1.)" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + } + eof { + wait + } + } + + return $matches +} +################################################################ +################################################################ +# +# Proc: list_test_object +# +# Purpose: Use sacctmgr to list the test object deletions +# +# Returns: Number representing the number of entries found. +# +# Input: None +# +################################################################ +proc list_test_object { to1 to2 to3 tname} { + global sacctmgr + set tmatches 0 + set my_pid [spawn $sacctmgr -n -p list $tname $to1,$to2,$to3] + expect { + -re "There was a problem" { + send_user "FAILURE: there was a problem with the sacctmgr command\n" + } + -re "($to1|$to2|$to3)" { + incr tmatches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr list associations not responding\n" + slow_kill $my_pid + } + eof { + wait + } + } + + return $tmatches +} +################################################################ +# +# Begin here with abort delete test +# +foreach atest $lista { +set my_pid [spawn $sacctmgr del $acc $atest] + expect { + "(N/y):" { + send "N\r" + exp_continue + timeout { + send_user "\nFAILURE: sacctmgr list $acc timing out.\n" + slow_kill $my_pid + } + } + } +} + + +if { [list_test_users] != 27 } { + send_user "\nFAILURE: This test$test_id concludes that sacctmgr did not abort delete of accounts.\n" + exit 1 + } + +# +foreach ctest $listc { +set my_pid [spawn $sacctmgr del $clu $ctest] + expect { + "(N/y):" { + send "N\r" + exp_continue + timeout { + send_user "\nFAILURE: sacctmgr list cluster timing out.\n" + slow_kill $my_pid + } + } + } +} + + +if { [list_test_users] != 27 } { + send_user "\nFAILURE: This test$test_id concludes that sacctmgr did not abort delete of $clu\n" + exit 1 + } + +# +foreach utest $listu { +set my_pid [spawn $sacctmgr del $usr $atest] + expect { + "(N/y):" { + send "N\r" + exp_continue + timeout { + send_user "\nFAILURE: sacctmgr list $usr timing out.\n" + slow_kill $my_pid + } + } + } +} + + +if { [list_test_users] != 27 } { + send_user "\nFAILURE: This test$test_id concludes that sacctmgr did not abort delete of users.\n" + exit 1 + } + +################################################################ +# +# Begin here with delete test +# +# +foreach udtest $listu { +set my_pid [spawn $sacctmgr del $usr $udtest] + expect { + "(N/y):" { + send "y\r" + exp_continue + timeout { + send_user "\nFAILURE: sacctmgr list $usr timing out.\n" + slow_kill $my_pid + } + } + } +} + +if { [list_test_object $us1 $us2 $us3 user] != 0 } { + send_user "\nFAILURE: This test$test_id concludes that sacctmgr did not delete users.\n" + exit 1 + } + +# + +foreach adtest $lista { +set my_pid [spawn $sacctmgr del $acc $adtest] + expect { + "(N/y):" { + send "y\r" + exp_continue + timeout { + send_user "\nFAILURE: sacctmgr list $acc timing out.\n" + slow_kill $my_pid + } + } + } +} + + +if { [list_test_object $nm1 $nm2 $nm3 account] != 0 } { + send_user "\nFAILURE: This test$test_id concludes that sacctmgr did not delete accounts.\n" + exit 1 + } + +# +foreach cdtest $listc { +set my_pid [spawn $sacctmgr del $clu $cdtest] + expect { + "(N/y):" { + send "y/r" + exp_continue + timeout { + send_user "\nFAILURE: sacctmgr list cluster timing out.\n" + slow_kill $my_pid + } + } + } +} + + +if { [list_test_object $tc1 $tc2 $tc3 cluster] != 0 } { + send_user "\nFAILURE: This test$test_id concludes that sacctmgr did not delete $clu\n" + exit 1 + } + +# This is the end below here + +incr exit_code [remove_user "" "" "$us1,$us2,$us3"] +incr exit_code [remove_acct "" "$nm1,$nm2,$nm3"] +incr exit_code [remove_cluster "$tc1,$tc2,$tc3"] + +if {$exit_code == 0} { + send_user "\nSUCCESS\n" +} else { + send_user "\nFAILURE\n" +} +exit $exit_code + -- GitLab