diff --git a/META b/META index 7b2a9a9df34139df7a2cc8ccfdd2d394aae01a5f..b9f60e14189d5b6a874166c039606c3e7cd303b2 100644 --- a/META +++ b/META @@ -3,9 +3,9 @@ Api_revision: 0 Major: 2 Meta: 1 - Micro: 6 + Micro: 7 Minor: 1 Name: slurm Release: 1 Release_tags: dist - Version: 2.1.6 + Version: 2.1.7 diff --git a/NEWS b/NEWS index 0990fda7c72a04ba6d1a1876aee43fa3161ed06a..85f38030421d7f17e888909d5aa312bce6ced267 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,49 @@ This file describes changes in recent versions of SLURM. It primarily documents those changes that are of interest to users and admins. +* Changes in SLURM 2.1.8 +======================== + +* Changes in SLURM 2.1.7 +======================== + -- Modify srun, salloc and sbatch parsing for the --signal option to accept + either a signal name in addition to the previously supported signal + numbers (e.g. "--signal=USR2@200"). + -- BLUEGENE - Fixed sinfo --long --Node output for cpus on a single cnode. + -- In sched/wiki2 - Fix another logic bug in support of Moab being able to + identify preemptable jobs. + -- In sched/wiki2 - For BlueGene systems only: Fix bug preventing Moab from + being able to correctly change the node count of pending jobs. + -- In select/cons_res - Fix bug preventing job preemption with a configuration + of Shared=FORCE:1 and PreemptMode=GANG,SUSPEND. + -- In the TaskProlog, add support for an "unset" option to clear environment + variables for the user application. Also add support for embedded white- + space in the environment variables exported to the user application + (everything after the equal sign to the end of the line is included without + alteration). + -- Do not install /etc/init.d/slurm or /etc/init.d/slurmdbd on AIX systems. + -- BLUEGENE - fixed check for small blocks if a node card of a midplane is + in an error state other jobs can still run on the midplane on other + nodecards. + -- BLUEGENE - Check to make sure job killing is in the active job table in + DB2 when killing the job. + -- Correct logic to support ResvOverRun configuration parameter. + -- Get --acctg-freq option working for srun and salloc commands. + -- Fix sinfo display of drained nodes correctly with the summarize flag. + -- Fix minor memory leaks in slurmd and slurmstepd. + -- Better error messages for failed step launch. + -- Modify srun to insure compatability of the --relative option with the node + count requested. + +* Changes in SLURM 2.1.6-2 +========================== + -- In sched/wiki2 - Fix logic in support of Moab being able to identify + preemptable jobs. + -- Applied fixes to a debug4 message in priority_multifactor.c sent in by + Per Lundqvist + -- BLUEGENE - Fixed issue where incorrect nodecards could be picked when + looking at combining small blocks to make a larger small block. + * Changes in SLURM 2.1.6 ======================== -- For newly submitted jobs, report expected start time in squeue --start as @@ -4957,4 +5000,4 @@ documents those changes that are of interest to users and admins. -- Change directory to /tmp in slurmd if daemonizing. -- Logfiles are reopened on reconfigure. -$Id: NEWS 19896 2010-03-26 21:49:42Z jette $ +$Id: NEWS 20049 2010-04-16 00:34:17Z da $ diff --git a/doc/html/accounting.shtml b/doc/html/accounting.shtml index 4c01186639e25e6c4b3be2b40f6969a5e2bbaf05..f4952fcb55819f6f01883d71172c85bc08b5c018 100644 --- a/doc/html/accounting.shtml +++ b/doc/html/accounting.shtml @@ -559,7 +559,7 @@ sacctmgr add account chemistry,physics parent=science \ For example, to permit user <i>da</i> to execute jobs on all clusters with a default account of <i>test</i> execute:</p> <pre> -sacctmgr add user da default=test +sacctmgr add user da DefaultAccount=test </pre> <p>If <b>AccountingStorageEnforce=associations</b> is configured in diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index af07a8434f7243cf74699f0b02afa5b8bbf66c02..f0dcb8c9744b9074e4e1436e330da2e1110d5777 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -135,7 +135,7 @@ For example: \fB\-\-constraint="opteron&video"\fR or In the first example, only nodes having both the feature "opteron" AND the feature "video" will be used. There is no mechanism to specify that you want one node with feature -"opteron" and another node with feature "video" in that case that no +"opteron" and another node with feature "video" in case no node has both features. If only one of a set of possible options should be used for all allocated nodes, then use the OR operator and enclose the options within square brackets. @@ -757,8 +757,8 @@ When a job is within \fIsig_time\fR seconds of its end time, send it the signal \fIsig_num\fR. Due to the resolution of event handling by SLURM, the signal may be sent up to 60 seconds earlier than specified. -Both \fIsig_time\fR and \fIsig_num\fR must have integer values -between zero and 65535. +\fIsig_num\fR may either be a signal number or name (e.g. "10" or "USR1"). +\fIsig_time\fR must have integer value between zero and 65535. By default, no signal is sent before the job's end time. If a \fIsig_num\fR is specified without any \fIsig_time\fR, the default time will be 60 seconds. diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index e5fe97639a4d5e92662ec44952687bca661b2d06..65925e84386c1bbbba208532d19c51544988696f 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -142,7 +142,7 @@ For example: \fB\-\-constraint="opteron&video"\fR or In the first example, only nodes having both the feature "opteron" AND the feature "video" will be used. There is no mechanism to specify that you want one node with feature -"opteron" and another node with feature "video" in that case that no +"opteron" and another node with feature "video" in case no node has both features. If only one of a set of possible options should be used for all allocated nodes, then use the OR operator and enclose the options within square brackets. @@ -841,8 +841,8 @@ When a job is within \fIsig_time\fR seconds of its end time, send it the signal \fIsig_num\fR. Due to the resolution of event handling by SLURM, the signal may be sent up to 60 seconds earlier than specified. -Both \fIsig_time\fR and \fIsig_num\fR must have integer values -between zero and 65535. +\fIsig_num\fR may either be a signal number or name (e.g. "10" or "USR1"). +\fIsig_time\fR must have integer value between zero and 65535. By default, no signal is sent before the job's end time. If a \fIsig_num\fR is specified without any \fIsig_time\fR, the default time will be 60 seconds. diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index e29f6a70e29f97687ebf3a486989ce6cef6cb2c6..626466e3ffa2bcfee27e74ef427a6e98ff3336db 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -381,10 +381,27 @@ The second number of the signal that caused the process to terminate if it was terminated by a signal. .TP \fIFeatures\fP=<features> -Set the job's required node features. Multiple values -may be comma separated if all features are required (AND operation) or -separated by "|" if any of the specified features are required (OR operation). -Value may be cleared with blank data value, "Features=". +Set the job's required node features. +The list of features may include multiple feature names separated +by ampersand (AND) and/or vertical bar (OR) operators. +For example: \fBFeatures="opteron&video"\fR or \fBFeatures="fast|faster"\fR. +In the first example, only nodes having both the feature "opteron" AND +the feature "video" will be used. +There is no mechanism to specify that you want one node with feature +"opteron" and another node with feature "video" in case no +node has both features. +If only one of a set of possible options should be used for all allocated +nodes, then use the OR operator and enclose the options within square brackets. +For example: "\fBFeatures=[rack1|rack2|rack3|rack4]"\fR might +be used to specify that all nodes must be allocated on a single rack of +the cluster, but any of those four racks can be used. +A request can also specify the number of nodes needed with some feature +by appending an asterisk and count after the feature name. +For example "\fBFeatures=graphics*4"\fR +indicates that at least four allocated nodes must have the feature "graphics." +Constraints with node counts may only be combined with AND operators. +Value may be cleared with blank data value, for example "Features=". + .TP \fIGeometry\fP=<geo> Reset the required job geometry. diff --git a/doc/man/man1/sinfo.1 b/doc/man/man1/sinfo.1 index 7cf092586e8f3e19621ef1fd5b8b00ac106d70f9..24578808a5bd302136ca5a195942020692ae5569 100644 --- a/doc/man/man1/sinfo.1 +++ b/doc/man/man1/sinfo.1 @@ -82,7 +82,7 @@ when running with various options are "%9P %5a %.10l %.5D %6t %N" .TP .I "\-\-summarize" -"%9P %5a %.10l %15F %N" +"%9P %5a %.10l %16F %N" .TP .I "\-\-long" "%9P %5a %.10l %.8s %4r %5h %10g %.5D %11T %N" diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index 3e58d440d82f93e5a8debc757b4cca2777fbe682..364c54043f23431b3f6532ee7c00a144b048e85a 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -131,7 +131,7 @@ For example: \fB\-\-constraint="opteron&video"\fR or In the first example, only nodes having both the feature "opteron" AND the feature "video" will be used. There is no mechanism to specify that you want one node with feature -"opteron" and another node with feature "video" in that case that no +"opteron" and another node with feature "video" in case no node has both features. If only one of a set of possible options should be used for all allocated nodes, then use the OR operator and enclose the options within square brackets. @@ -953,8 +953,8 @@ When a job is within \fIsig_time\fR seconds of its end time, send it the signal \fIsig_num\fR. Due to the resolution of event handling by SLURM, the signal may be sent up to 60 seconds earlier than specified. -Both \fIsig_time\fR and \fIsig_num\fR must have integer values -between zero and 65535. +\fIsig_num\fR may either be a signal number or name (e.g. "10" or "USR1"). +\fIsig_time\fR must have integer value between zero and 65535. By default, no signal is sent before the job's end time. If a \fIsig_num\fR is specified without any \fIsig_time\fR, the default time will be 60 seconds. diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index a5f683cdb8a7a3d3e439aac851663f7ef9196d75..e6f25ed1fecaf0bbcb8447e35db2f57399df5dd3 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -1685,32 +1685,48 @@ Fully qualified pathname of a program to be execute as the slurm job's owner prior to initiation of each task. Besides the normal environment variables, this has SLURM_TASK_PID available to identify the process ID of the task being started. -Standard output from this program of the form -"export NAME=value" will be used to set environment variables -for the task being spawned. -Standard output from this program of the form -"print ..." will cause that line (without the leading "print ") +Standard output from this program can be used to control the environment +variables and output for the user program. +.RS +.TP 20 +\fBexport NAME=value\fR +Will set environment variables for the task being spawned. +Everything after the equal sign to the end of the +line will be used as the value for the environment variable. +Exporting of functions is not currently supported. +.TP +\fBprint ...\fR +Will cause that line (without the leading "print ") to be printed to the job's standard output. +.TP +\fBunset NAME\fR +Will clear environment variables for the task being spawned. +.TP The order of task prolog/epilog execution is as follows: -.RS .TP -\fB1. pre_launch()\fR: function in TaskPlugin +\fB1. pre_launch()\fR +Function in TaskPlugin .TP -\fB2. TaskProlog\fR: system\-wide per task program defined in slurm.conf +\fB2. TaskProlog\fR +System\-wide per task program defined in slurm.conf .TP -\fB3. user prolog\fR: job step specific task program defined using +\fB3. user prolog\fR +Job step specific task program defined using \fBsrun\fR's \fB\-\-task\-prolog\fR option or \fBSLURM_TASK_PROLOG\fR environment variable .TP \fB4.\fR Execute the job step's task .TP -\fB5. user epilog\fR: job step specific task program defined using +\fB5. user epilog\fR +Job step specific task program defined using \fBsrun\fR's \fB\-\-task\-epilog\fR option or \fBSLURM_TASK_EPILOG\fR environment variable .TP -\fB6. TaskEpilog\fR: system\-wide per task program defined in slurm.conf +\fB6. TaskEpilog\fR +System\-wide per task program defined in slurm.conf .TP -\fB7. post_term()\fR: function in TaskPlugin +\fB7. post_term()\fR +Function in TaskPlugin .RE .TP diff --git a/etc/init.d.slurm b/etc/init.d.slurm index 90cc7a1f2cea62e04949608baceb51ec5885f550..d7ab6af2085521e8bc655f87b6a9b860b4aed0f2 100644 --- a/etc/init.d.slurm +++ b/etc/init.d.slurm @@ -38,7 +38,10 @@ if [ -f /etc/rc.status ]; then rc_reset else - [ -f /etc/rc.d/init.d/functions ] || exit 0 + if [ ! -f /etc/rc.d/init.d/functions ]; then + echo "Could not find /etc/rc.d/init.d/functions. Is some other daemon launch mechanism used?" + exit 1 + fi . /etc/rc.d/init.d/functions SUSE=0 STARTPROC=daemon @@ -52,6 +55,11 @@ else RETVAL=0 fi +if [ ! -x $BINDIR/scontrol ]; then + echo "Could not find $BINDIR/scontrol. Bad path?" + exit 1 +fi + # We can not use a starter program without losing environment # variables that are critical on Blue Gene systems if [ -d /bgl/BlueLight/ppcfloor ]; then @@ -66,7 +74,10 @@ else SLURMD_OPTIONS="" fi -[ -f $CONFDIR/slurm.conf ] || exit 1 +if [ ! -f $CONFDIR/slurm.conf ]; then + echo "Could not find $CONFDIR/slurm.conf. Bad path?" + exit 1 +fi # setup library paths for slurm and munge support export LD_LIBRARY_PATH="$LIBDIR:$LD_LIBRARY_PATH" diff --git a/etc/init.d.slurmdbd b/etc/init.d.slurmdbd index e8df33d0b3eb3ff1456c5cd87ffa772e372ababc..18d75f54f964fcf9ff97a0119b02af65da1bd831 100755 --- a/etc/init.d.slurmdbd +++ b/etc/init.d.slurmdbd @@ -33,7 +33,10 @@ if [ -f /etc/rc.status ]; then rc_reset else - [ -f /etc/rc.d/init.d/functions ] || exit 0 + if [ ! -f /etc/rc.d/init.d/functions ]; then + echo "Could not find /etc/rc.d/init.d/functions. Is some other daemon launch mechanism used?" + exit 1 + fi . /etc/rc.d/init.d/functions SUSE=0 STARTPROC=daemon @@ -60,7 +63,10 @@ else SLURMDBD_OPTIONS="" fi -[ -f $CONFDIR/slurmdbd.conf ] || exit 1 +if [ ! -f $CONFDIR/slurmdbd.conf ]; then + echo "Could not find $CONFDIR/slurmdbd.conf. Bad path?" + exit 1 +fi # setup library paths for slurm and munge support export LD_LIBRARY_PATH="$LIBDIR:$LD_LIBRARY_PATH" diff --git a/slurm.spec b/slurm.spec index 2e4a03b821c3b125c56dfdbb5b9ac7bfba55d6de..abcaec43da0bdf2cb4450e058fb33f5df3415471 100644 --- a/slurm.spec +++ b/slurm.spec @@ -1,4 +1,4 @@ -# $Id: slurm.spec 19555 2010-02-19 22:35:07Z da $ +# $Id: slurm.spec 19976 2010-04-07 15:55:58Z jette $ # # Note that this package is not relocatable @@ -83,14 +83,14 @@ %endif Name: slurm -Version: 2.1.6 +Version: 2.1.7 Release: 1%{?dist} Summary: Simple Linux Utility for Resource Management License: GPL Group: System Environment/Base -Source: slurm-2.1.6.tar.bz2 +Source: slurm-2.1.7.tar.bz2 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release} URL: https://computing.llnl.gov/linux/slurm/ @@ -352,7 +352,7 @@ Gives the ability for SLURM to use Berkeley Lab Checkpoint/Restart ############################################################################# %prep -%setup -n slurm-2.1.6 +%setup -n slurm-2.1.7 %build %configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \ @@ -382,13 +382,13 @@ DESTDIR="$RPM_BUILD_ROOT" make install DESTDIR="$RPM_BUILD_ROOT" make install-contrib %ifos aix5.3 -mv ${RPM_BUILD_ROOT}%{_bindir}/srun ${RPM_BUILD_ROOT}%{_sbindir} + mv ${RPM_BUILD_ROOT}%{_bindir}/srun ${RPM_BUILD_ROOT}%{_sbindir} +%else + if [ -d /etc/init.d ]; then + install -D -m755 etc/init.d.slurm $RPM_BUILD_ROOT/etc/init.d/slurm + install -D -m755 etc/init.d.slurmdbd $RPM_BUILD_ROOT/etc/init.d/slurmdbd + fi %endif - -if [ -d /etc/init.d ]; then - install -D -m755 etc/init.d.slurm $RPM_BUILD_ROOT/etc/init.d/slurm - install -D -m755 etc/init.d.slurmdbd $RPM_BUILD_ROOT/etc/init.d/slurmdbd -fi install -D -m644 etc/slurm.conf.example ${RPM_BUILD_ROOT}%{_sysconfdir}/slurm.conf.example install -D -m644 etc/slurmdbd.conf.example ${RPM_BUILD_ROOT}%{_sysconfdir}/slurmdbd.conf.example install -D -m755 etc/slurm.epilog.clean ${RPM_BUILD_ROOT}%{_sysconfdir}/slurm.epilog.clean @@ -421,9 +421,8 @@ rm -f ${RPM_BUILD_ROOT}%{_mandir}/man1/srun_cr* ${RPM_BUILD_ROOT}%{_bindir}/srun # Build conditional file list for main package LIST=./slurm.files touch $LIST -if [ -d /etc/init.d ]; then - echo "/etc/init.d/slurm" >> $LIST -fi +test -f $RPM_BUILD_ROOT/etc/init.d/slurm && + echo /etc/init.d/slurm >> $LIST %if %{slurm_with aix} install -D -m644 etc/federation.conf.example ${RPM_BUILD_ROOT}%{_sysconfdir}/federation.conf.example @@ -446,9 +445,8 @@ test -f $RPM_BUILD_ROOT/%{_libdir}/slurm/switch_federation.so && LIST=./slurmdbd.files touch $LIST -if [ -d /etc/init.d ]; then - echo "/etc/init.d/slurmdbd" >> $LIST -fi +test -f $RPM_BUILD_ROOT/etc/init.d/slurm && + echo /etc/init.d/slurmdbd >> $LIST LIST=./plugins.files touch $LIST diff --git a/src/common/proc_args.c b/src/common/proc_args.c index 55781b9a49857320b7ec2565712b4beee3942719..d68811a5c04b39826a43e7cecaa9e2750c0895f4 100644 --- a/src/common/proc_args.c +++ b/src/common/proc_args.c @@ -751,17 +751,20 @@ int get_signal_opts(char *optarg, uint16_t *warn_signal, uint16_t *warn_time) if (optarg == NULL) return -1; - num = strtol(optarg, &endptr, 10); - if ((num < 0) || (num > 0x0ffff)) + endptr = strchr(optarg, '@'); + if (endptr) + endptr[0] = '\0'; + num = (uint16_t) sig_name2num(optarg); + if (endptr) + endptr[0] = '@'; + if ((num < 1) || (num > 0x0ffff)) return -1; *warn_signal = (uint16_t) num; - if (endptr[0] == '\0') { + if (!endptr) { *warn_time = 60; return 0; } - if (endptr[0] != '@') - return -1; num = strtol(endptr+1, &endptr, 10); if ((num < 0) || (num > 0x0ffff)) @@ -771,3 +774,46 @@ int get_signal_opts(char *optarg, uint16_t *warn_signal, uint16_t *warn_time) return 0; return -1; } + +/* Convert a signal name to it's numeric equivalent. + * Return -1 on failure */ +int sig_name2num(char *signal_name) +{ + char *sig_name[] = {"HUP", "INT", "QUIT", "KILL", "TERM", + "USR1", "USR2", "CONT", NULL}; + int sig_num[] = {SIGHUP, SIGINT, SIGQUIT, SIGKILL, SIGTERM, + SIGUSR1, SIGUSR2, SIGCONT}; + char *ptr; + long tmp; + int sig; + int i; + + tmp = strtol(signal_name, &ptr, 10); + if (ptr != signal_name) { /* found a number */ + if (xstring_is_whitespace(ptr)) + sig = (int)tmp; + else + return 0; + } else { + ptr = (char *)signal_name; + while (isspace(*ptr)) + ptr++; + if (strncasecmp(ptr, "SIG", 3) == 0) + ptr += 3; + for (i = 0; ; i++) { + if (sig_name[i] == NULL) + return 0; + if (strncasecmp(ptr, sig_name[i], + strlen(sig_name[i])) == 0) { + /* found the signal name */ + if (!xstring_is_whitespace(ptr + + strlen(sig_name[i]))) + return 0; + sig = sig_num[i]; + break; + } + } + } + + return sig; +} diff --git a/src/common/proc_args.h b/src/common/proc_args.h index 2e75bd816325ac612664ce2160f459d4721f6536..201377a0e19f0b931008f333ee5444afe1e71d12 100644 --- a/src/common/proc_args.h +++ b/src/common/proc_args.h @@ -132,4 +132,8 @@ char *print_geometry(const uint16_t *geometry); * RET 0 on success, -1 on failure */ int get_signal_opts(char *optarg, uint16_t *warn_signal, uint16_t *warn_time); +/* Convert a signal name to it's numeric equivalent. + * Return 0 on failure */ +int sig_name2num(char *signal_name); + #endif /* !_PROC_ARGS_H */ diff --git a/src/plugins/priority/multifactor/priority_multifactor.c b/src/plugins/priority/multifactor/priority_multifactor.c index 1e114f52018b39071f5733a68d8a4f40bb64864b..aa3bda1aee6bcba4909c29b87cb818e729f209bd 100644 --- a/src/plugins/priority/multifactor/priority_multifactor.c +++ b/src/plugins/priority/multifactor/priority_multifactor.c @@ -794,10 +794,10 @@ static void *_decay_thread(void *no_data) debug4("adding %f new usage to " "assoc %u (user='%s' acct='%s') " "raw usage is now %Lf. Group " - "wall added %d making it %d.", + "wall added %d making it %f.", real_decay, assoc->id, assoc->user, assoc->acct, - assoc->usage_raw, run_delta, + assoc->usage_raw, run_decay, assoc->grp_used_wall); assoc = assoc->parent_assoc_ptr; diff --git a/src/plugins/sched/wiki2/job_modify.c b/src/plugins/sched/wiki2/job_modify.c index 44d6aabcfca6ad24412719b44eea7eb1140f4e8e..6f77e3606009bc79295f85438be627fc95a9b069 100644 --- a/src/plugins/sched/wiki2/job_modify.c +++ b/src/plugins/sched/wiki2/job_modify.c @@ -329,15 +329,47 @@ host_fini: if (rc) { if (IS_JOB_PENDING(job_ptr) && job_ptr->details) { job_ptr->details->min_nodes = job_desc.min_nodes; - if (job_ptr->details->max_nodes - && (job_ptr->details->max_nodes < job_desc.min_nodes)) + if (job_ptr->details->max_nodes && + (job_ptr->details->max_nodes < job_desc.min_nodes)) job_ptr->details->max_nodes = job_desc.min_nodes; info("wiki: change job %u min_nodes to %u", jobid, new_node_cnt); #ifdef HAVE_BG +{ + static uint16_t cpus_per_node = 0; + /* Set the block type to NAV here since if the + job size changes from a small block to a + regular size block the block types won't + jive. + */ + uint16_t conn_type = (uint16_t)SELECT_NAV; + /* Also, if geometry is set we need to 0 out + the first part of it so the bluegene plugin + doesn't look at it any more. + */ + uint16_t req_geometry[SYSTEM_DIMENSIONS] = { 0 }; + job_ptr->num_procs = job_desc.num_procs; job_ptr->details->job_min_cpus = job_desc.job_min_cpus; + + if (!cpus_per_node) { + select_g_alter_node_cnt(SELECT_GET_NODE_CPU_CNT, + &cpus_per_node); + } + new_node_cnt = job_ptr->num_procs; + if (cpus_per_node) + new_node_cnt /= cpus_per_node; + select_g_select_jobinfo_set(job_ptr->select_jobinfo, + SELECT_JOBDATA_NODE_CNT, + &new_node_cnt); + select_g_select_jobinfo_set(job_ptr->select_jobinfo, + SELECT_JOBDATA_CONN_TYPE, + &conn_type); + select_g_select_jobinfo_set(job_ptr->select_jobinfo, + SELECT_JOBDATA_GEOMETRY, + &req_geometry); +} #endif last_job_update = now; update_accounting = true; diff --git a/src/plugins/sched/wiki2/job_will_run.c b/src/plugins/sched/wiki2/job_will_run.c index 62ad4fe6e39829df5fade6c489691b041ea8df83..70b83434aa16f76a09ea9f98ec7a7ae857a6e027 100644 --- a/src/plugins/sched/wiki2/job_will_run.c +++ b/src/plugins/sched/wiki2/job_will_run.c @@ -48,7 +48,6 @@ #define MAX_JOB_QUEUE 20 -static void _preempt_list_del(void *x); static char * _will_run_test(uint32_t jobid, time_t start_time, char *node_list, int *err_code, char **err_msg); static char * _will_run_test2(uint32_t jobid, time_t start_time, @@ -410,7 +409,7 @@ static char * _will_run_test2(uint32_t jobid, time_t start_time, uint32_t *preemptee, int preemptee_cnt, int *err_code, char **err_msg) { - struct job_record *job_ptr = NULL, *pre_ptr, **pre_pptr; + struct job_record *job_ptr = NULL, *pre_ptr; struct part_record *part_ptr; bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL; time_t start_res; @@ -531,15 +530,10 @@ static char * _will_run_test2(uint32_t jobid, time_t start_time, } if (preemptee_cnt) { - preemptee_candidates = list_create(_preempt_list_del); + preemptee_candidates = list_create(NULL); for (i=0; i<preemptee_cnt; i++) { - pre_ptr = find_job_record(preemptee[i]); - if (pre_ptr) { - pre_pptr = xmalloc(sizeof(struct - job_record *)); - pre_pptr[0] = pre_ptr; - list_append(preemptee_candidates, pre_pptr); - } + if ((pre_ptr = find_job_record(preemptee[i]))) + list_append(preemptee_candidates, pre_ptr); } } @@ -570,15 +564,14 @@ static char * _will_run_test2(uint32_t jobid, time_t start_time, xfree(hostlist); if (preempted_jobs) { - while ((pre_pptr = list_pop(preempted_jobs))) { + while ((pre_ptr = list_pop(preempted_jobs))) { if (pre_cnt++) sep = ","; else sep = " PREEMPT="; snprintf(tmp_str, sizeof(tmp_str), "%s%u", - sep, pre_pptr[0]->job_id); + sep, pre_ptr->job_id); xstrcat(reply_msg, tmp_str); - xfree(pre_pptr); } list_destroy(preempted_jobs); } @@ -594,11 +587,6 @@ static char * _will_run_test2(uint32_t jobid, time_t start_time, return reply_msg; } -static void _preempt_list_del(void *x) -{ - xfree(x); -} - /* * bitmap2wiki_node_name - given a bitmap, build a list of colon separated * node names (if we can't use node range expressions), or the diff --git a/src/plugins/select/bluegene/block_allocator/block_allocator.c b/src/plugins/select/bluegene/block_allocator/block_allocator.c index 45c1f15815442ce1d13b65940200f22a8d842ab0..1bf7220d29f2aa3f7ebe66623a431d32775755fb 100644 --- a/src/plugins/select/bluegene/block_allocator/block_allocator.c +++ b/src/plugins/select/bluegene/block_allocator/block_allocator.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * block_allocator.c - Assorted functions for layout of bluegene blocks, * wiring, mapping for smap, etc. - * $Id: block_allocator.c 19402 2010-02-04 01:02:41Z jette $ + * $Id: block_allocator.c 20043 2010-04-16 00:29:44Z da $ ***************************************************************************** * Copyright (C) 2004-2007 The Regents of the University of California. * Copyright (C) 2008-2009 Lawrence Livermore National Security. @@ -1348,7 +1348,7 @@ extern int allocate_block(ba_request_t* ba_request, List results) * Admin wants to remove a previous allocation. * will allow Admin to delete a previous allocation retrival by letter code. */ -extern int remove_block(List nodes, int new_count) +extern int remove_block(List nodes, int new_count, int conn_type) { int dim; ba_node_t* curr_ba_node = NULL; @@ -1371,6 +1371,10 @@ extern int remove_block(List nodes, int new_count) ba_node->used = false; ba_node->color = 7; ba_node->letter = '.'; + /* Small blocks don't use wires, and only have 1 node, + so just break. */ + if(conn_type == SELECT_SMALL) + break; for(dim=0;dim<BA_SYSTEM_DIMENSIONS;dim++) { curr_switch = &ba_node->axis_switch[dim]; if(curr_switch->int_wire[0].used) { @@ -1440,7 +1444,7 @@ extern int redo_block(List nodes, int *geo, int conn_type, int new_count) if(!ba_node) return SLURM_ERROR; - remove_block(nodes, new_count); + remove_block(nodes, new_count, conn_type); list_delete_all(nodes, &empty_null_destroy_list, ""); name = set_bg_block(nodes, ba_node->coord, geo, conn_type); @@ -1646,7 +1650,7 @@ extern char *set_bg_block(List results, int *start, if(!found) { debug2("trying less efficient code"); - remove_block(results, color_count); + remove_block(results, color_count, conn_type); list_delete_all(results, &empty_null_destroy_list, ""); list_append(results, ba_node); found = _find_x_path(results, ba_node, @@ -3440,7 +3444,10 @@ static int _reset_the_path(ba_switch_t *curr_switch, int source, } /*set the switch to not be used */ if(!curr_switch->int_wire[source].used) { - debug("I reached the end, the source isn't used"); + /* This means something overlapping the removing block + already cleared this, or the path just never was + complete in the first place. */ + debug3("I reached the end, the source isn't used"); return 1; } curr_switch->int_wire[source].used = 0; @@ -3704,7 +3711,8 @@ start_again: } if(results) { - remove_block(results, color_count); + remove_block(results, color_count, + ba_request->conn_type); list_delete_all(results, &empty_null_destroy_list, ""); } diff --git a/src/plugins/select/bluegene/block_allocator/block_allocator.h b/src/plugins/select/bluegene/block_allocator/block_allocator.h index beeb861b778fad13f5bc55966f5cc5b911ec4e47..b5af1bf23aa53e089a48ec9d60fc873aa06e60bc 100644 --- a/src/plugins/select/bluegene/block_allocator/block_allocator.h +++ b/src/plugins/select/bluegene/block_allocator/block_allocator.h @@ -409,7 +409,7 @@ extern int allocate_block(ba_request_t* ba_request, List results); * Admin wants to remove a previous allocation. * will allow Admin to delete a previous allocation retrival by letter code. */ -extern int remove_block(List nodes, int new_count); +extern int remove_block(List nodes, int new_count, int conn_type); /* * Admin wants to change something about a previous allocation. diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c index 2b4476a9914b993f11b668b89466936a35ca00f7..acd5c679d349e306f2677bcfc8034878b53f2529 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_place.c +++ b/src/plugins/select/bluegene/plugin/bg_job_place.c @@ -943,7 +943,7 @@ static int _find_best_block_match(List block_list, request.avail_node_bitmap = slurm_block_bitmap; select_g_select_jobinfo_get(job_ptr->select_jobinfo, - SELECT_JOBDATA_MAX_CPUS, &max_cpus); + SELECT_JOBDATA_MAX_CPUS, &max_cpus); /* since we only look at procs after this and not nodes we * need to set a max_cpus if given */ @@ -994,12 +994,19 @@ static int _find_best_block_match(List block_list, bg_record->bg_block_id, 1) != SLURM_SUCCESS) { /* check_block_bp_states will - already set things in an - error state, so we don't - have to do that here. + set this block in the main + list to an error state, but + we aren't looking + at the main list, so we + need to set this copy of + the block in an + error state as well. */ + bg_record->job_running = + BLOCK_ERROR_STATE; + bg_record->state = RM_PARTITION_ERROR; error("_find_best_block_match: Picked " - "block had some issues with " + "block (%s) had some issues with " "hardware, trying a different " "one.", bg_record->bg_block_id); diff --git a/src/plugins/select/bluegene/plugin/bg_job_run.c b/src/plugins/select/bluegene/plugin/bg_job_run.c index b702ca459fe9a58b3fa92d97e46fcc95c7803bfd..d69e4addda3c0623c4e21230326786b0bf04f6e8 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_run.c +++ b/src/plugins/select/bluegene/plugin/bg_job_run.c @@ -2,7 +2,7 @@ * bg_job_run.c - blue gene job execution (e.g. initiation and termination) * functions. * - * $Id: bg_job_run.c 19700 2010-03-08 18:50:55Z da $ + * $Id: bg_job_run.c 20011 2010-04-13 19:10:21Z da $ ***************************************************************************** * Copyright (C) 2004-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -96,7 +96,7 @@ static pthread_cond_t agent_cond = PTHREAD_COND_INITIALIZER; static int agent_cnt = 0; #ifdef HAVE_BG_FILES -static int _remove_job(db_job_id_t job_id); +static int _remove_job(db_job_id_t job_id, char *block_id); #endif static void _bg_list_del(void *x); @@ -113,14 +113,15 @@ static void _term_agent(bg_update_t *bg_update_ptr); #ifdef HAVE_BG_FILES /* Kill a job and remove its record from MMCS */ -static int _remove_job(db_job_id_t job_id) +static int _remove_job(db_job_id_t job_id, char *block_id) { int rc; int count = 0; rm_job_t *job_rec = NULL; rm_job_state_t job_state; + bool is_history = false; - debug("removing job %d from MMCS", job_id); + debug("removing job %d from MMCS on block %s", job_id, block_id); while(1) { if (count) sleep(POLL_INTERVAL); @@ -152,22 +153,46 @@ static int _remove_job(db_job_id_t job_id) continue; } + /* If this job is in the history table we + should just exit here since it is marked + incorrectly */ + if ((rc = bridge_get_data(job_rec, RM_JobInHist, + &is_history)) + != STATUS_OK) { + (void) bridge_free_job(job_rec); + if (rc == JOB_NOT_FOUND) { + debug("job %d removed from MMCS", job_id); + return STATUS_OK; + } + + error("bridge_get_data(RM_JobInHist) for jobid=%d " + "%s", job_id, bg_err_str(rc)); + continue; + } + if ((rc = bridge_free_job(job_rec)) != STATUS_OK) error("bridge_free_job: %s", bg_err_str(rc)); - debug2("job %d is in state %d", job_id, job_state); + debug2("job %d on block %s is in state %d history %d", + job_id, block_id, job_state, is_history); /* check the state and process accordingly */ - if(job_state == RM_JOB_TERMINATED) + if(is_history) { + debug2("Job %d on block %s isn't in the " + "active job table anymore, final state was %d", + job_id, block_id, job_state); + return STATUS_OK; + } else if(job_state == RM_JOB_TERMINATED) return STATUS_OK; else if(job_state == RM_JOB_DYING) { if(count > MAX_POLL_RETRIES) - error("Job %d isn't dying, trying for " - "%d seconds", job_id, - count*POLL_INTERVAL); + error("Job %d on block %s isn't dying, " + "trying for %d seconds", job_id, + block_id, count*POLL_INTERVAL); continue; } else if(job_state == RM_JOB_ERROR) { - error("job %d is in a error state.", job_id); + error("job %d on block %s is in a error state.", + job_id, block_id); //free_bg_block(); return STATUS_OK; @@ -179,7 +204,10 @@ static int _remove_job(db_job_id_t job_id) * process gets killed with a SIGTERM. In the case of * bridge_cancel_job it always gets killed with a * SIGKILL. From IBM's point of view that is a bad - * deally, so we are going to use signal ;). + * deally, so we are going to use signal ;). Sending + * a SIGKILL will kill the mpirun front end process, + * and if you kill that jobs will never get cleaned up and + * you end up with ciod unreacahble on the next job. */ // rc = bridge_cancel_job(job_id); @@ -187,16 +215,23 @@ static int _remove_job(db_job_id_t job_id) if (rc != STATUS_OK) { if (rc == JOB_NOT_FOUND) { - debug("job %d removed from MMCS", job_id); + debug("job %d on block %s removed from MMCS", + job_id, block_id); return STATUS_OK; } if(rc == INCOMPATIBLE_STATE) - debug("job %d is in an INCOMPATIBLE_STATE", - job_id); + debug("job %d on block %s is in an " + "INCOMPATIBLE_STATE", + job_id, block_id); else error("bridge_signal_job(%d): %s", job_id, bg_err_str(rc)); - } + } else if(count > MAX_POLL_RETRIES) + error("Job %d on block %s is in state %d and " + "isn't dying, and doesn't appear to be " + "responding to SIGTERM, trying for %d seconds", + job_id, block_id, job_state, count*POLL_INTERVAL); + } error("Failed to remove job %d from MMCS", job_id); @@ -378,7 +413,7 @@ static void _remove_jobs_on_block_and_reset(rm_job_list_t *job_list, continue; } debug2("got job_id %d",job_id); - if((rc = _remove_job(job_id)) == INTERNAL_ERROR) { + if((rc = _remove_job(job_id, block_id)) == INTERNAL_ERROR) { job_remove_failed = 1; break; } diff --git a/src/plugins/select/bluegene/plugin/bg_record_functions.c b/src/plugins/select/bluegene/plugin/bg_record_functions.c index 774370597760d7eda4063c0a8f54c160efa5fa73..fe79d6e5f42c6b6917e80fde1786f3cb362db2ae 100644 --- a/src/plugins/select/bluegene/plugin/bg_record_functions.c +++ b/src/plugins/select/bluegene/plugin/bg_record_functions.c @@ -493,10 +493,15 @@ extern int bg_record_cmpf_inc(bg_record_t* rec_a, bg_record_t* rec_b) int size_a = rec_a->node_cnt; int size_b = rec_b->node_cnt; - if (size_a < size_b) - return -1; - else if (size_a > size_b) - return 1; + /* We only look at this if we are ordering blocks larger than + * a midplane, order of ionodes is how we order otherwise. */ + if((size_a >= bg_conf->bp_node_cnt) + || (size_b >= bg_conf->bp_node_cnt)) { + if (size_a < size_b) + return -1; + else if (size_a > size_b) + return 1; + } if(rec_a->nodes && rec_b->nodes) { size_a = strcmp(rec_a->nodes, rec_b->nodes); diff --git a/src/plugins/select/bluegene/plugin/block_sys.c b/src/plugins/select/bluegene/plugin/block_sys.c index 264756d2434cee551323f232bc11d7a4a684b8b6..093a300035dad01b45e963b340b698b5accb4deb 100755 --- a/src/plugins/select/bluegene/plugin/block_sys.c +++ b/src/plugins/select/bluegene/plugin/block_sys.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * block_sys.c - component used for wiring up the blocks * - * $Id: block_sys.c 19849 2010-03-23 19:10:13Z da $ + * $Id: block_sys.c 19995 2010-04-09 20:55:46Z da $ ***************************************************************************** * Copyright (C) 2004 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -267,7 +267,8 @@ static int _post_allocate(bg_record_t *bg_record) #ifdef HAVE_BG_FILES #ifdef HAVE_BGL -static int _find_nodecard(rm_partition_t *block_ptr, int *nc_id) +extern int find_nodecard_num(rm_partition_t *block_ptr, rm_nodecard_t *ncard, + int *nc_id) { char *my_card_name = NULL; char *card_name = NULL; @@ -276,19 +277,12 @@ static int _find_nodecard(rm_partition_t *block_ptr, int *nc_id) int i=0; int rc; rm_nodecard_list_t *ncard_list = NULL; - rm_nodecard_t *ncard = NULL; rm_BP_t *curr_bp = NULL; + rm_nodecard_t *ncard2; xassert(block_ptr); xassert(nc_id); - if((rc = bridge_get_data(block_ptr, - RM_PartitionFirstNodeCard, - &ncard)) - != STATUS_OK) { - error("bridge_get_data(RM_FirstCard): %s", - bg_err_str(rc)); - } if((rc = bridge_get_data(ncard, RM_NodeCardID, &my_card_name)) @@ -330,7 +324,7 @@ static int _find_nodecard(rm_partition_t *block_ptr, int *nc_id) if ((rc = bridge_get_data(ncard_list, RM_NodeCardListNext, - &ncard)) != STATUS_OK) { + &ncard2)) != STATUS_OK) { error("bridge_get_data" "(RM_NodeCardListNext): %s", rc); @@ -340,7 +334,7 @@ static int _find_nodecard(rm_partition_t *block_ptr, int *nc_id) } else { if ((rc = bridge_get_data(ncard_list, RM_NodeCardListFirst, - &ncard)) != STATUS_OK) { + &ncard2)) != STATUS_OK) { error("bridge_get_data" "(RM_NodeCardListFirst: %s", rc); @@ -348,7 +342,7 @@ static int _find_nodecard(rm_partition_t *block_ptr, int *nc_id) goto cleanup; } } - if ((rc = bridge_get_data(ncard, + if ((rc = bridge_get_data(ncard2, RM_NodeCardID, &card_name)) != STATUS_OK) { error("bridge_get_data(RM_NodeCardID: %s", @@ -593,7 +587,7 @@ int read_bg_blocks(List curr_block_list) nc_id = 0; if(nc_cnt == 1) - _find_nodecard(block_ptr, &nc_id); + find_nodecard_num(block_ptr, ncard, &nc_id); bg_record->node_cnt = nc_cnt * bg_conf->nodecard_node_cnt; diff --git a/src/plugins/select/bluegene/plugin/bluegene.h b/src/plugins/select/bluegene/plugin/bluegene.h index 55f21f788abffa89f75d1aadcef5d45bb6735cf5..bff5f46f14ce6b26b326e53bd191ebe7aed1d598 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.h +++ b/src/plugins/select/bluegene/plugin/bluegene.h @@ -1,7 +1,7 @@ /*****************************************************************************\ * bluegene.h - header for blue gene configuration processing module. * - * $Id: bluegene.h 19095 2009-12-01 22:59:18Z da $ + * $Id: bluegene.h 19995 2010-04-09 20:55:46Z da $ ***************************************************************************** * Copyright (C) 2004 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -168,6 +168,12 @@ extern int validate_current_blocks(char *dir); /* block_sys.c */ /*****************************************************/ +#ifdef HAVE_BG_FILES +#ifdef HAVE_BGL +extern int find_nodecard_num(rm_partition_t *block_ptr, rm_nodecard_t *ncard, + int *nc_id); +#endif +#endif extern int configure_block(bg_record_t * bg_conf_record); extern int read_bg_blocks(); extern int load_state_file(List curr_block_list, char *dir_name); diff --git a/src/plugins/select/bluegene/plugin/dynamic_block.c b/src/plugins/select/bluegene/plugin/dynamic_block.c index 627a6c045a8ab5626b77b5e52ed1423d8a42821e..bca33d8e548299edfd6e0b77080a49a3f6bb65bb 100644 --- a/src/plugins/select/bluegene/plugin/dynamic_block.c +++ b/src/plugins/select/bluegene/plugin/dynamic_block.c @@ -271,7 +271,8 @@ extern List create_dynamic_block(List block_list, debug3("removing %s for request %d", bg_record->nodes, request->size); - remove_block(bg_record->bg_block_list, (int)NO_VAL); + remove_block(bg_record->bg_block_list, (int)NO_VAL, + (int)bg_record->conn_type); /* need to set any unusable nodes that this last block used */ removable_set_bps(unusable_nodes); @@ -404,7 +405,8 @@ extern bg_record_t *create_small_record(bg_record_t *bg_record, found_record->ionode_bitmap = bit_copy(ionodes); bit_fmt(bitstring, BITSIZE, found_record->ionode_bitmap); found_record->ionodes = xstrdup(bitstring); - + debug4("made small block of %s[%s]", + found_record->nodes, found_record->ionodes); return found_record; } @@ -608,6 +610,7 @@ static int _breakup_blocks(List block_list, List new_blocks, char tmp_char[256]; bitstr_t *ionodes = bit_alloc(bg_conf->numpsets); int cnodes = request->procs / bg_conf->cpu_ratio; + int curr_bp_bit = -1; debug2("proc count = %d cnodes = %d size = %d", request->procs, cnodes, request->size); @@ -676,6 +679,36 @@ static int _breakup_blocks(List block_list, List new_blocks, if(bg_record->node_cnt < cnodes) { char bitstring[BITSIZE]; bitstr_t *bitstr = NULL; + int num_over = 0; + int num_cnodes = bg_record->node_cnt; + int rec_bp_bit = bit_ffs(bg_record->bitmap); + + if(curr_bp_bit != rec_bp_bit) { + /* Got a different node than + * previously, since the list should + * be in order of nodes for small blocks + * just clear here since the last node + * doesn't have any more. */ + curr_bp_bit = rec_bp_bit; + bit_nclear(ionodes, 0, (bg_conf->numpsets-1)); + total_cnode_cnt = 0; + } + + /* On really busy systems we can get + overlapping blocks here. If that is the + case only add that which doesn't overlap. + */ + if((num_over = bit_overlap( + ionodes, bg_record->ionode_bitmap))) { + /* Since the smallest block size is + the number of cnodes in an io node, + just multiply the num_over by that to + get the number of cnodes to remove. + */ + if((num_cnodes -= + num_over * bg_conf->smallest_block) <= 0) + continue; + } bit_or(ionodes, bg_record->ionode_bitmap); /* check and see if the bits set are a valid @@ -690,15 +723,16 @@ static int _breakup_blocks(List block_list, List new_blocks, if(!bitstr) { bit_nclear(ionodes, 0, (bg_conf->numpsets-1)); bit_or(ionodes, bg_record->ionode_bitmap); - total_cnode_cnt = bg_record->node_cnt; + total_cnode_cnt = num_cnodes = + bg_record->node_cnt; } else - total_cnode_cnt += bg_record->node_cnt; + total_cnode_cnt += num_cnodes; bit_fmt(bitstring, BITSIZE, ionodes); - debug2("1 adding %s %d got %d set " + debug2("1 adding %s %s %d got %d set " "ionodes %s total is %s", - bg_record->bg_block_id, - bg_record->node_cnt, total_cnode_cnt, + bg_record->bg_block_id, bg_record->nodes, + num_cnodes, total_cnode_cnt, bg_record->ionodes, bitstring); if(total_cnode_cnt == cnodes) { request->save_name = xstrdup_printf( diff --git a/src/plugins/select/bluegene/plugin/state_test.c b/src/plugins/select/bluegene/plugin/state_test.c index 600de3398656829abdffcd25bab907cebb58a772..03c3595bcb77899c99fc8611dea5afe4d049231c 100644 --- a/src/plugins/select/bluegene/plugin/state_test.c +++ b/src/plugins/select/bluegene/plugin/state_test.c @@ -2,7 +2,7 @@ * state_test.c - Test state of Bluegene base partitions and switches. * DRAIN nodes in SLURM that are not usable. * - * $Id: state_test.c 19755 2010-03-16 19:15:43Z da $ + * $Id: state_test.c 19998 2010-04-12 19:18:46Z da $ ***************************************************************************** * Copyright (C) 2004-2007 The Regents of the University of California. * Copyright (C) 2008-2009 Lawrence Livermore National Security. @@ -122,6 +122,17 @@ static void _configure_node_down(rm_bp_id_t bp_id, my_bluegene_t *my_bg) continue; } + /* make sure we have this midplane in the system */ + if(bp_loc.X >= DIM_SIZE[X] + || bp_loc.Y >= DIM_SIZE[Y] + || bp_loc.Z >= DIM_SIZE[Z]) { + debug4("node %s%c%c%c isn't configured", + bg_conf->slurm_node_prefix, + alpha_num[bp_loc.X], alpha_num[bp_loc.Y], + alpha_num[bp_loc.Z]); + continue; + } + snprintf(bg_down_node, sizeof(bg_down_node), "%s%c%c%c", bg_conf->slurm_node_prefix, alpha_num[bp_loc.X], alpha_num[bp_loc.Y], @@ -141,6 +152,136 @@ static void _configure_node_down(rm_bp_id_t bp_id, my_bluegene_t *my_bg) } } +static char *_get_bp_node_name(rm_BP_t *bp_ptr) +{ + rm_location_t bp_loc; + int rc; + + errno = SLURM_SUCCESS; + + if ((rc = bridge_get_data(bp_ptr, RM_BPLoc, &bp_loc)) + != STATUS_OK) { + error("bridge_get_data(RM_BPLoc): %s", bg_err_str(rc)); + errno = SLURM_ERROR; + return NULL; + } + + /* make sure we have this midplane in the system */ + if(bp_loc.X >= DIM_SIZE[X] + || bp_loc.Y >= DIM_SIZE[Y] + || bp_loc.Z >= DIM_SIZE[Z]) { + debug4("node %s%c%c%c isn't configured", + bg_conf->slurm_node_prefix, + alpha_num[bp_loc.X], alpha_num[bp_loc.Y], + alpha_num[bp_loc.Z]); + return NULL; + } + + return xstrdup_printf("%s%c%c%c", + bg_conf->slurm_node_prefix, + alpha_num[bp_loc.X], alpha_num[bp_loc.Y], + alpha_num[bp_loc.Z]); +} + +/* To fake a nodecard down do this on the service node. +db2 "update bg{l|p}nodecard set status = 'E' where location = +'Rxx-Mx-Nx' and status='A'" +Reverse the A, and E to bring it back up. +*/ +static int _test_nodecard_state(rm_nodecard_t *ncard, int nc_id, + char *node_name, bool slurmctld_locked) +{ + int rc = SLURM_SUCCESS; + rm_nodecard_id_t nc_name = NULL; + rm_nodecard_state_t state; + int io_start = 0; + + if ((rc = bridge_get_data(ncard, + RM_NodeCardState, + &state)) != STATUS_OK) { + error("bridge_get_data(RM_NodeCardState: %s", + rc); + return SLURM_ERROR; + } + + if(state == RM_NODECARD_UP) + return SLURM_SUCCESS; + + if ((rc = bridge_get_data(ncard, + RM_NodeCardID, + &nc_name)) != STATUS_OK) { + error("bridge_get_data(RM_NodeCardID): %d", rc); + return SLURM_ERROR; + } + + if(!nc_name) { + error("We didn't get an RM_NodeCardID but rc was STATUS_OK?"); + return SLURM_ERROR; + } + +#ifdef HAVE_BGL + if ((rc = bridge_get_data(ncard, + RM_NodeCardQuarter, + &io_start)) != STATUS_OK) { + error("bridge_get_data(CardQuarter): %d", rc); + rc = SLURM_ERROR; + goto clean_up; + } + io_start *= bg_conf->quarter_ionode_cnt; + io_start += bg_conf->nodecard_ionode_cnt * (nc_id%4); +#else + /* From the first nodecard id we can figure + out where to start from with the alloc of ionodes. + */ + io_start = atoi((char*)nc_name+1); + io_start *= bg_conf->io_ratio; +#endif + /* On small systems with less than a midplane the + database may see the nodecards there but in missing + state. To avoid getting a bunch of warnings here just + skip over the ones missing. + */ + if(io_start >= bg_conf->numpsets) { + rc = SLURM_SUCCESS; + if(state == RM_NODECARD_MISSING) { + debug3("Nodecard %s is missing", + nc_name); + } else { + error("We don't have the system configured " + "for this nodecard %s, we only have " + "%d ionodes and this starts at %d", + nc_name, io_start, bg_conf->numpsets); + } + goto clean_up; + } + + /* if(!ionode_bitmap) */ + /* ionode_bitmap = bit_alloc(bg_conf->numpsets); */ + /* info("setting %s start %d of %d", */ + /* nc_name, io_start, bg_conf->numpsets); */ + /* bit_nset(ionode_bitmap, io_start, io_start+io_cnt); */ + + /* we have to handle each nodecard separately to make + sure we don't create holes in the system */ + if(down_nodecard(node_name, io_start, slurmctld_locked) + == SLURM_SUCCESS) { + debug("nodecard %s on %s is in an error state", + nc_name, node_name); + } else + debug2("nodecard %s on %s is in an error state, " + "but error was returned when trying to make it so", + nc_name, node_name); + + /* Here we want to keep track of any nodecard that + isn't up and return error if it is in the system. */ + rc = SLURM_ERROR; + +clean_up: + + free(nc_name); + return rc; +} + /* * This could potentially lock the node lock in the slurmctld with * slurm_drain_node, so if nodes_locked is called we will call the @@ -149,17 +290,14 @@ static void _configure_node_down(rm_bp_id_t bp_id, my_bluegene_t *my_bg) static int _test_down_nodecards(rm_BP_t *bp_ptr, bool slurmctld_locked) { rm_bp_id_t bp_id = NULL; - rm_nodecard_id_t nc_name = NULL; int num = 0; int marked_down = 0; int i=0; int rc = SLURM_SUCCESS; rm_nodecard_list_t *ncard_list = NULL; rm_nodecard_t *ncard = NULL; - rm_nodecard_state_t state; //bitstr_t *ionode_bitmap = NULL; //bg_record_t *bg_record = NULL; - int *coord = NULL; char *node_name = NULL; //int bp_bit = 0; //int io_cnt = 1; @@ -183,28 +321,7 @@ static int _test_down_nodecards(rm_BP_t *bp_ptr, bool slurmctld_locked) goto clean_up; } - coord = find_bp_loc(bp_id); - if(!coord) { - error("Could not find coordinates for " - "BP ID %s", (char *) bp_id); - rc = SLURM_ERROR; - goto clean_up; - } - - /* make sure we have this midplane in the system */ - if(coord[X] >= DIM_SIZE[X] - || coord[Y] >= DIM_SIZE[Y] - || coord[Z] >= DIM_SIZE[Z]) { - debug4("node %s isn't configured", bp_id); - rc = SLURM_SUCCESS; - goto clean_up; - } - - node_name = xstrdup_printf("%s%c%c%c", - bg_conf->slurm_node_prefix, - alpha_num[coord[X]], - alpha_num[coord[Y]], - alpha_num[coord[Z]]); + node_name = _get_bp_node_name(bp_ptr); if((rc = bridge_get_data(ncard_list, RM_NodeCardListSize, &num)) != STATUS_OK) { @@ -215,8 +332,6 @@ static int _test_down_nodecards(rm_BP_t *bp_ptr, bool slurmctld_locked) } for(i=0; i<num; i++) { - int io_start = 0; - if (i) { if ((rc = bridge_get_data(ncard_list, RM_NodeCardListNext, @@ -238,83 +353,10 @@ static int _test_down_nodecards(rm_BP_t *bp_ptr, bool slurmctld_locked) goto clean_up; } } - if ((rc = bridge_get_data(ncard, - RM_NodeCardState, - &state)) != STATUS_OK) { - error("bridge_get_data(RM_NodeCardState: %s", - rc); - rc = SLURM_ERROR; - goto clean_up; - } - - if(state == RM_NODECARD_UP) - continue; - - /* Here we want to keep track of any nodecard that - isn't up and return error if this is not 0 since - we could be checking to see if we could run here. */ - marked_down++; - - if ((rc = bridge_get_data(ncard, - RM_NodeCardID, - &nc_name)) != STATUS_OK) { - error("bridge_get_data(RM_NodeCardID): %d",rc); - rc = SLURM_ERROR; - goto clean_up; - } - - if(!nc_name) { - rc = SLURM_ERROR; - goto clean_up; - } -#ifdef HAVE_BGL - if ((rc = bridge_get_data(ncard, - RM_NodeCardQuarter, - &io_start)) != STATUS_OK) { - error("bridge_get_data(CardQuarter): %d",rc); - goto clean_up; - } - io_start *= bg_conf->quarter_ionode_cnt; - io_start += bg_conf->nodecard_ionode_cnt * (i%4); -#else - /* From the first nodecard id we can figure - out where to start from with the alloc of ionodes. - */ - io_start = atoi((char*)nc_name+1); - io_start *= bg_conf->io_ratio; -#endif - /* On small systems with less than a midplane the - database may see the nodecards there but in missing - state. To avoid getting a bunch of warnings here just - skip over the ones missing. - */ - if(io_start >= bg_conf->numpsets) { - if(state == RM_NODECARD_MISSING) { - debug3("Nodecard %s is missing continue", - nc_name); - } else { - error("We don't have the system configured " - "for this nodecard %s, we only have " - "%d ionodes and this starts at %d", - nc_name, io_start, bg_conf->numpsets); - } - free(nc_name); - continue; - } -/* if(!ionode_bitmap) */ -/* ionode_bitmap = bit_alloc(bg_conf->numpsets); */ -/* info("setting %s start %d of %d", */ -/* nc_name, io_start, bg_conf->numpsets); */ -/* bit_nset(ionode_bitmap, io_start, io_start+io_cnt); */ - /* we have to handle each nodecard separately to make - sure we don't create holes in the system */ - if(down_nodecard(node_name, io_start, slurmctld_locked) - == SLURM_SUCCESS) { - debug("nodecard %s on %s is in an error state", - nc_name, node_name); - } - free(nc_name); + if(_test_nodecard_state(ncard, i, node_name, slurmctld_locked) + != SLURM_SUCCESS) + marked_down++; } /* this code is here to bring up a block after it is in an @@ -374,6 +416,7 @@ clean_up: /* If we marked any nodecard down we need to state it here */ if((rc == SLURM_SUCCESS) && marked_down) rc = SLURM_ERROR; + return rc; } @@ -523,8 +566,9 @@ extern int check_block_bp_states(char *bg_block_id, bool slurmctld_locked) #ifdef HAVE_BG_FILES rm_partition_t *block_ptr = NULL; rm_BP_t *bp_ptr = NULL; - int bp_cnt = 0; + int cnt = 0; int i = 0; + bool small = false; if ((rc = bridge_get_block(bg_block_id, &block_ptr)) != STATUS_OK) { error("Block %s doesn't exist.", bg_block_id); @@ -534,14 +578,103 @@ extern int check_block_bp_states(char *bg_block_id, bool slurmctld_locked) } - if ((rc = bridge_get_data(block_ptr, RM_PartitionBPNum, &bp_cnt)) + if ((rc = bridge_get_data(block_ptr, RM_PartitionSmall, &small)) + != STATUS_OK) { + error("bridge_get_data(RM_PartitionSmall): %s", + bg_err_str(rc)); + rc = SLURM_ERROR; + + goto cleanup; + } + + if(small) { + rm_nodecard_t *ncard = NULL; + char *node_name = NULL; + + /* If this is a small block we can just check the + nodecard list of the block. + */ + if((rc = bridge_get_data(block_ptr, + RM_PartitionNodeCardNum, + &cnt)) + != STATUS_OK) { + error("bridge_get_data(RM_PartitionNodeCardNum): %s", + bg_err_str(rc)); + rc = SLURM_ERROR; + goto cleanup; + } + + if ((rc = bridge_get_data(block_ptr, + RM_PartitionFirstBP, + &bp_ptr)) + != STATUS_OK) { + error("bridge_get_data(RM_FirstBP): %s", + bg_err_str(rc)); + rc = SLURM_ERROR; + goto cleanup; + } + + if(!(node_name = _get_bp_node_name(bp_ptr))) { + rc = errno; + goto cleanup; + } + + for(i=0; i<cnt; i++) { + int nc_id = 0; + if(i) { + if ((rc = bridge_get_data( + block_ptr, + RM_PartitionNextNodeCard, + &ncard)) + != STATUS_OK) { + error("bridge_get_data(" + "RM_PartitionNextNodeCard): %s", + bg_err_str(rc)); + rc = SLURM_ERROR; + break; + } + } else { + if ((rc = bridge_get_data( + block_ptr, + RM_PartitionFirstNodeCard, + &ncard)) + != STATUS_OK) { + error("bridge_get_data(" + "RM_PartitionFirstNodeCard): %s", + bg_err_str(rc)); + rc = SLURM_ERROR; + break; + } + } +#ifdef HAVE_BGL + find_nodecard_num(block_ptr, ncard, &nc_id); +#endif + /* If we find any nodecards in an error state just + break here since we are seeing if we can run. If + any nodecard is down this can't happen. + */ + if(_test_nodecard_state( + ncard, nc_id, node_name, slurmctld_locked) + != SLURM_SUCCESS) { + rc = SLURM_ERROR; + break; + } + } + xfree(node_name); + goto cleanup; + } + + /* If this isn't a small block we have to check the list of + nodecards on each midplane. + */ + if ((rc = bridge_get_data(block_ptr, RM_PartitionBPNum, &cnt)) != STATUS_OK) { error("bridge_get_data(RM_BPNum): %s", bg_err_str(rc)); rc = SLURM_ERROR; goto cleanup; } - for(i=0; i<bp_cnt; i++) { + for(i=0; i<cnt; i++) { if(i) { if ((rc = bridge_get_data(block_ptr, RM_PartitionNextBP, @@ -568,9 +701,11 @@ extern int check_block_bp_states(char *bg_block_id, bool slurmctld_locked) break here since we are seeing if we can run. If any nodecard is down this can't happen. */ - if((rc = _test_down_nodecards(bp_ptr, slurmctld_locked)) - != SLURM_SUCCESS) + if(_test_down_nodecards(bp_ptr, slurmctld_locked) + != SLURM_SUCCESS) { + rc = SLURM_ERROR; break; + } } cleanup: diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c index 0fa4b6cd5638188ec721a0cff8a1e47589e60da6..596577adf6c5f04c4e250c52483fe590ddd4df3b 100644 --- a/src/plugins/select/cons_res/job_test.c +++ b/src/plugins/select/cons_res/job_test.c @@ -1794,8 +1794,6 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, "idle resources found"); goto alloc_job; } - debug3("cons_res: cr_job_test: test 1 fail - " - "not enough idle resources"); if (job_node_req == NODE_CR_ONE_ROW) { /* This job CANNOT share CPUs regardless of priority, @@ -1803,8 +1801,12 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, * addressed in _verify_node_state() and job preemption * removes jobs from simulated resource allocation map * before this point. */ + debug3("cons_res: cr_job_test: test 1 fail - " + "no idle resources available"); goto alloc_job; } + debug3("cons_res: cr_job_test: test 1 fail - " + "not enough idle resources"); /*** Step 2 ***/ bit_copybits(bitmap, orig_map); diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index ef2e047a568a220c264516e86faffa210e6899d9..62500a7b125fe9728b1a3fd92785f37d64fc553b 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -1087,9 +1087,8 @@ static uint16_t _get_job_node_req(struct job_record *job_ptr) if (max_share == 0) /* Partition Shared=EXCLUSIVE */ return NODE_CR_RESERVED; - /* Partition Shared=FORCE with count > 1 */ - if ((max_share & SHARED_FORCE) && - ((max_share & (~SHARED_FORCE)) > 1)) + /* Partition is Shared=FORCE */ + if (max_share & SHARED_FORCE) return NODE_CR_AVAILABLE; /* Partition is Shared=NO or Shared=YES */ diff --git a/src/salloc/opt.c b/src/salloc/opt.c index 5b6d23a90eaf00fe3a200be110eec97db94dedef..aab3a2c759e0f17a4c8d68debc55b54f13ae3dc7 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -188,7 +188,6 @@ static bool _opt_verify(void); static void _proc_get_user_env(char *optarg); static void _process_env_var(env_vars_t *e, const char *val); -static int _parse_signal(const char *signal_name); static void _usage(void); /*---[ end forward declarations of static functions ]---------------------*/ @@ -737,9 +736,11 @@ void set_options(const int argc, char **argv) break; case 'K': /* argument is optional */ if (optarg) { - opt.kill_command_signal =_parse_signal(optarg); - if (opt.kill_command_signal == 0) + opt.kill_command_signal = sig_name2num(optarg); + if (opt.kill_command_signal == 0) { + error("Invalid signal name %s", optarg); exit(error_exit); + } } opt.kill_command_signal_set = true; break; @@ -1564,61 +1565,6 @@ static char *print_constraints() return buf; } -/* - * Takes a string containing the number or name of a signal and returns - * the signal number. The signal name is case insensitive, and may be of - * the form "SIGHUP" or just "HUP". - * - * Allowed signal names are HUP, INT, QUIT, KILL, TERM, USR1, USR2, and CONT. - */ -static int _parse_signal(const char *signal_name) -{ - char *sig_name[] = {"HUP", "INT", "QUIT", "KILL", "TERM", - "USR1", "USR2", "CONT", NULL}; - int sig_num[] = {SIGHUP, SIGINT, SIGQUIT, SIGKILL, SIGTERM, - SIGUSR1, SIGUSR2, SIGCONT}; - char *ptr; - long tmp; - int sig; - int i; - - tmp = strtol(signal_name, &ptr, 10); - if (ptr != signal_name) { /* found a number */ - if (xstring_is_whitespace(ptr)) { - sig = (int)tmp; - } else { - goto fail; - } - } else { - ptr = (char *)signal_name; - while (isspace(*ptr)) - ptr++; - if (strncasecmp(ptr, "SIG", 3) == 0) - ptr += 3; - for (i = 0; ; i++) { - if (sig_name[i] == NULL) { - goto fail; - } - if (strncasecmp(ptr, sig_name[i], - strlen(sig_name[i])) == 0) { - /* found the signal name */ - if (!xstring_is_whitespace( - ptr + strlen(sig_name[i]))) { - goto fail; - } - sig = sig_num[i]; - break; - } - } - } - - return sig; - -fail: - error("\"%s\" is not a valid signal", signal_name); - return 0; -} - #define tf_(b) (b == true) ? "true" : "false" static void _opt_list() diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 2b05f2078b4a6eaaa052fbae265b3fddb58c25e0..567f6189323aa9ce0d9eecdb5866d5371ec50dc0 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -491,6 +491,8 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) desc->begin_time = opt.begin; if (opt.account) desc->account = xstrdup(opt.account); + if (opt.acctg_freq >= 0) + desc->acctg_freq = opt.acctg_freq; if (opt.comment) desc->comment = xstrdup(opt.comment); if (opt.qos) diff --git a/src/sinfo/opts.c b/src/sinfo/opts.c index 5b618ffa097097efbd33b8cf6a2f07df15233c76..7de33968f0975c62caef09da85e63e9dc8c0dcc0 100644 --- a/src/sinfo/opts.c +++ b/src/sinfo/opts.c @@ -232,7 +232,7 @@ extern void parse_command_line(int argc, char *argv[]) #ifdef HAVE_BG params.format = "%9P %.5a %.10l %.32F %N"; #else - params.format = "%9P %.5a %.10l %.15F %N"; + params.format = "%9P %.5a %.10l %.16F %N"; #endif } else if ( params.node_flag ) { params.node_field_flag = true; /* compute size later */ diff --git a/src/sinfo/sinfo.c b/src/sinfo/sinfo.c index 49c6c40499011b5154f35cdd9028f7c7d2199f9b..108a069855c32d740bb3a9fced1be66dbbbad6e4 100644 --- a/src/sinfo/sinfo.c +++ b/src/sinfo/sinfo.c @@ -59,10 +59,6 @@ struct sinfo_parameters params; static int g_node_scaling = 1; -#ifdef HAVE_BG -static int cpus_per_node = 1; -#endif - /************ * Funtions * ************/ @@ -76,7 +72,7 @@ static sinfo_data_t *_create_sinfo(partition_info_t* part_ptr, static bool _filter_out(node_info_t *node_ptr); static void _sinfo_list_delete(void *data); static bool _match_node_data(sinfo_data_t *sinfo_ptr, - node_info_t *node_ptr); + node_info_t *node_ptr, uint32_t node_scaling); static bool _match_part_data(sinfo_data_t *sinfo_ptr, partition_info_t* part_ptr); static int _query_server(partition_info_msg_t ** part_pptr, @@ -334,10 +330,6 @@ static int _build_sinfo_data(List sinfo_list, g_node_scaling = node_msg->node_scaling; -#ifdef HAVE_BG - cpus_per_node = node_msg->node_array[0].cpus / g_node_scaling; -#endif - /* by default every partition is shown, even if no nodes */ if ((!params.node_flag) && params.match_flags.partition_flag) { part_ptr = partition_msg->partition_array; @@ -517,7 +509,7 @@ static void _sort_hostlist(List sinfo_list) } static bool _match_node_data(sinfo_data_t *sinfo_ptr, - node_info_t *node_ptr) + node_info_t *node_ptr, uint32_t node_scaling) { if (sinfo_ptr->nodes && params.match_flags.features_flag && @@ -543,7 +535,7 @@ static bool _match_node_data(sinfo_data_t *sinfo_ptr, return true; if (params.match_flags.cpus_flag && - (node_ptr->cpus != sinfo_ptr->min_cpus)) + ((node_ptr->cpus / node_scaling) != sinfo_ptr->min_cpus)) return false; if (params.match_flags.sockets_flag && (node_ptr->sockets != sinfo_ptr->min_sockets)) @@ -630,6 +622,9 @@ static void _update_sinfo(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr, uint16_t base_state; uint16_t used_cpus = 0, error_cpus = 0; int total_cpus = 0, total_nodes = 0; + /* since node_scaling could be less here we need to use the + global node scaling which should never change. */ + int single_node_cpus = (node_ptr->cpus / g_node_scaling); base_state = node_ptr->node_state & NODE_STATE_BASE; @@ -637,8 +632,8 @@ static void _update_sinfo(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr, sinfo_ptr->node_state = node_ptr->node_state; sinfo_ptr->features = node_ptr->features; sinfo_ptr->reason = node_ptr->reason; - sinfo_ptr->min_cpus = node_ptr->cpus; - sinfo_ptr->max_cpus = node_ptr->cpus; + sinfo_ptr->min_cpus = single_node_cpus; + sinfo_ptr->max_cpus = single_node_cpus; sinfo_ptr->min_sockets = node_ptr->sockets; sinfo_ptr->max_sockets = node_ptr->sockets; sinfo_ptr->min_cores = node_ptr->cores; @@ -656,10 +651,10 @@ static void _update_sinfo(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr, * just return, don't duplicate */ return; } else { - if (sinfo_ptr->min_cpus > node_ptr->cpus) - sinfo_ptr->min_cpus = node_ptr->cpus; - if (sinfo_ptr->max_cpus < node_ptr->cpus) - sinfo_ptr->max_cpus = node_ptr->cpus; + if (sinfo_ptr->min_cpus > single_node_cpus) + sinfo_ptr->min_cpus = single_node_cpus; + if (sinfo_ptr->max_cpus < single_node_cpus) + sinfo_ptr->max_cpus = single_node_cpus; if (sinfo_ptr->min_sockets > node_ptr->sockets) sinfo_ptr->min_sockets = node_ptr->sockets; @@ -711,31 +706,33 @@ static void _update_sinfo(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr, node_ptr->reason = xstrdup("Block(s) in error state"); sinfo_ptr->reason = node_ptr->reason; } - if(params.match_flags.cpus_flag && (used_cpus || error_cpus)) { - /* we only get one shot at this (because the node name - is the same), so we need to make - sure we get all the subgrps accounted for here */ + if(!params.match_flags.state_flag && (used_cpus || error_cpus)) { + /* We only get one shot at this (because all states + are combined together), so we need to make + sure we get all the subgrps accounted. (So use + g_node_scaling for safe measure) */ total_nodes = g_node_scaling; sinfo_ptr->nodes_alloc += used_cpus; sinfo_ptr->nodes_other += error_cpus; sinfo_ptr->nodes_idle += (total_nodes - (used_cpus + error_cpus)); - used_cpus *= cpus_per_node; - error_cpus *= cpus_per_node; + used_cpus *= single_node_cpus; + error_cpus *= single_node_cpus; } else { /* process only for this subgrp and then return */ - total_cpus = total_nodes * cpus_per_node; + total_cpus = total_nodes * single_node_cpus; if ((base_state == NODE_STATE_ALLOCATED) || (node_ptr->node_state & NODE_STATE_COMPLETING)) { sinfo_ptr->nodes_alloc += total_nodes; sinfo_ptr->cpus_alloc += total_cpus; - } else if (base_state == NODE_STATE_IDLE) { - sinfo_ptr->nodes_idle += total_nodes; - sinfo_ptr->cpus_idle += total_cpus; - } else { + } else if (IS_NODE_DRAIN(node_ptr) || + (base_state == NODE_STATE_DOWN)) { sinfo_ptr->nodes_other += total_nodes; + sinfo_ptr->cpus_other += total_cpus; + } else { + sinfo_ptr->nodes_idle += total_nodes; sinfo_ptr->cpus_idle += total_cpus; } @@ -748,10 +745,11 @@ static void _update_sinfo(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr, if ((base_state == NODE_STATE_ALLOCATED) || IS_NODE_COMPLETING(node_ptr)) sinfo_ptr->nodes_alloc += total_nodes; - else if (base_state == NODE_STATE_IDLE) + else if (IS_NODE_DRAIN(node_ptr) || + (base_state == NODE_STATE_DOWN)) + sinfo_ptr->nodes_other += total_nodes; + else sinfo_ptr->nodes_idle += total_nodes; - else - sinfo_ptr->nodes_other += total_nodes; #endif sinfo_ptr->nodes_total += total_nodes; @@ -769,9 +767,10 @@ static void _update_sinfo(sinfo_data_t *sinfo_ptr, node_info_t *node_ptr, } else sinfo_ptr->cpus_idle += total_cpus; -/* info("count is now %d %d %d %d", */ -/* sinfo_ptr->cpus_alloc, sinfo_ptr->cpus_idle, */ -/* sinfo_ptr->cpus_other, sinfo_ptr->cpus_total); */ + /* info("count is now %d %d %d %d %d", */ + /* sinfo_ptr->cpus_alloc, sinfo_ptr->cpus_idle, */ + /* sinfo_ptr->cpus_other, sinfo_ptr->cpus_total, */ + /* sinfo_ptr->nodes_total); */ } static int _insert_node_ptr(List sinfo_list, uint16_t part_num, @@ -787,7 +786,7 @@ static int _insert_node_ptr(List sinfo_list, uint16_t part_num, if (!_match_part_data(sinfo_ptr, part_ptr)) continue; if (sinfo_ptr->nodes_total && - (!_match_node_data(sinfo_ptr, node_ptr))) + (!_match_node_data(sinfo_ptr, node_ptr, node_scaling))) continue; _update_sinfo(sinfo_ptr, node_ptr, node_scaling); break; diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c index 678f71b78e6f466a6be16bf0efd21432b8b1a860..a59048363a4a2eecd14d297feeb597745a04d880 100644 --- a/src/slurmctld/agent.c +++ b/src/slurmctld/agent.c @@ -1395,6 +1395,8 @@ static void _purge_agent_args(agent_arg_t *agent_arg_ptr) slurm_free_srun_exec_msg(agent_arg_ptr->msg_args); else if (agent_arg_ptr->msg_type == SRUN_NODE_FAIL) slurm_free_srun_node_fail_msg(agent_arg_ptr->msg_args); + else if (agent_arg_ptr->msg_type == SRUN_STEP_MISSING) + slurm_free_srun_step_missing_msg(agent_arg_ptr->msg_args); else xfree(agent_arg_ptr->msg_args); } diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index a91b69c7880338c6fab31773a6d86ba3fcebcfaf..e3b15059c9d99b2ccc8eec6f10b9e7194ebefbc0 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -2894,7 +2894,7 @@ static int _validate_job_create_req(job_desc_msg_t * job_desc) if (job_desc->mloaderimage && (strlen(job_desc->mloaderimage) > MAX_STR_LEN)) { info("_validate_job_create_req: strlen(mloaderimage) too big (%d)", - strlen(job_desc->features)); + strlen(job_desc->mloaderimage)); return ESLURM_PATHNAME_TOO_LONG; } if (job_desc->name && (strlen(job_desc->name) > MAX_STR_LEN)) { @@ -5029,13 +5029,33 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) else { #ifdef HAVE_BG uint32_t node_cnt = job_specs->num_procs; + /* Set the block type to NAV here since if the + job size changes from a small block to a + regular size block the block types won't + jive. + */ + uint16_t conn_type = (uint16_t)SELECT_NAV; + /* Also, if geometry is set we need to 0 out + the first part of it so the bluegene plugin + doesn't look at it any more. + */ + uint16_t req_geometry[SYSTEM_DIMENSIONS] = { 0 }; + if(cpus_per_node) node_cnt /= cpus_per_node; select_g_select_jobinfo_set(job_ptr->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &node_cnt); + select_g_select_jobinfo_set(job_ptr->select_jobinfo, + SELECT_JOBDATA_CONN_TYPE, + &conn_type); + select_g_select_jobinfo_set(job_ptr->select_jobinfo, + SELECT_JOBDATA_GEOMETRY, + &req_geometry); #endif job_ptr->num_procs = job_specs->num_procs; + job_ptr->details->job_min_cpus = + job_specs->job_min_cpus; info("update_job: setting num_procs to %u for " "job_id %u", job_specs->num_procs, job_specs->job_id); diff --git a/src/slurmctld/reservation.c b/src/slurmctld/reservation.c index 6f6816254f9b23a9599ea7b1432e3d048838b028..46f05a2112e22dfd89d410b632f7a5abdfeb9828 100644 --- a/src/slurmctld/reservation.c +++ b/src/slurmctld/reservation.c @@ -2825,7 +2825,7 @@ extern int job_resv_check(struct job_record *job_ptr) else job_ptr->resv_ptr->job_pend_cnt++; - if (job_ptr->resv_ptr->end_time < (time(NULL) + resv_over_run)) + if ((job_ptr->resv_ptr->end_time + resv_over_run) < time(NULL)) return ESLURM_INVALID_TIME_VALUE; return SLURM_SUCCESS; } diff --git a/src/slurmd/slurmd/req.c b/src/slurmd/slurmd/req.c index 879b93ddbfa8931fe03827da5a11764d7537d0da..1cc41c4a4c5e83dc572630d188da8d6496a19711 100644 --- a/src/slurmd/slurmd/req.c +++ b/src/slurmd/slurmd/req.c @@ -1234,7 +1234,7 @@ _abort_step(uint32_t job_id, uint32_t step_id) step_complete_msg_t resp; slurm_msg_t resp_msg; slurm_msg_t_init(&resp_msg); - int rc; /* Note: we are ignoring return code */ + int rc, rc2; resp.job_id = job_id; resp.job_step_id = step_id; @@ -1244,7 +1244,10 @@ _abort_step(uint32_t job_id, uint32_t step_id) resp.jobacct = jobacct_gather_g_create(NULL); resp_msg.msg_type = REQUEST_STEP_COMPLETE; resp_msg.data = &resp; - return slurm_send_recv_controller_rc_msg(&resp_msg, &rc); + rc2 = slurm_send_recv_controller_rc_msg(&resp_msg, &rc); + /* Note: we are ignoring the RPC return code */ + jobacct_gather_g_destroy(resp.jobacct); + return rc2; } static void diff --git a/src/slurmd/slurmd/slurmd.c b/src/slurmd/slurmd/slurmd.c index 472f31aed42d01804d67f802ba9c79e08abdf8cd..51d7b98f79a663f09e14b36ec45392cdbeb8d4c1 100644 --- a/src/slurmd/slurmd/slurmd.c +++ b/src/slurmd/slurmd/slurmd.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmd/slurmd.c - main slurm node server daemon - * $Id: slurmd.c 19759 2010-03-16 20:32:26Z jette $ + * $Id: slurmd.c 20026 2010-04-14 20:31:58Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008-2009 Lawrence Livermore National Security. @@ -724,9 +724,11 @@ _read_config(void) _massage_pathname(&conf->logfile); /* set node_addr if relevant */ - if((conf->node_addr = slurm_conf_get_nodeaddr(conf->hostname))) - if (strcmp(conf->node_addr, conf->hostname) == 0) - xfree(conf->node_addr); /* Sets to NULL */ + if ((conf->node_addr == NULL) && + (conf->node_addr = slurm_conf_get_nodeaddr(conf->hostname)) && + (strcmp(conf->node_addr, conf->hostname) == 0)) { + xfree(conf->node_addr); /* Sets to NULL */ + } conf->port = slurm_conf_get_port(conf->node_name); slurm_conf_get_cpus_sct(conf->node_name, diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index fc9f0e7d2aa91756ac2f82582d792d089d420fed..a32336089d32546ca402b235d546215c9e1100d1 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/mgr.c - job manager functions for slurmstepd - * $Id: mgr.c 19858 2010-03-23 21:29:05Z jette $ + * $Id: mgr.c 20036 2010-04-15 18:52:13Z da $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008-2009 Lawrence Livermore National Security. @@ -1649,7 +1649,7 @@ _send_launch_resp(slurmd_job_t *job, int rc) } if (slurm_send_only_node_msg(&resp_msg) != SLURM_SUCCESS) - error("failed to send RESPONSE_LAUNCH_TASKS: %m"); + error("failed to send RESPONSE_LAUNCH_TASKS: %m"); xfree(resp.local_pids); xfree(resp.task_ids); diff --git a/src/slurmd/slurmstepd/slurmstepd_job.h b/src/slurmd/slurmstepd/slurmstepd_job.h index 3a9735f0ee50d71ba195c006960ef3ba3e424c05..f7c66c3e88d95e68fb520435da49c32c96ab966e 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.h +++ b/src/slurmd/slurmstepd/slurmstepd_job.h @@ -1,38 +1,38 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/slurmstepd_job.h slurmd_job_t definition - * $Id: slurmstepd_job.h 19152 2009-12-10 22:29:52Z da $ + * $Id: slurmstepd_job.h 19917 2010-03-30 16:41:15Z da $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Copyright (C) 2008-2009 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Mark Grondona <mgrondona@llnl.gov>. * CODE-OCEC-09-009. All rights reserved. - * + * * This file is part of SLURM, a resource management program. * For details, see <https://computing.llnl.gov/linux/slurm/>. * Please also read the included file: DISCLAIMER. - * + * * SLURM is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * - * In addition, as a special exception, the copyright holders give permission - * to link the code of portions of this program with the OpenSSL library under - * certain conditions as described in each individual source file, and - * distribute linked combinations including the two. You must obey the GNU - * General Public License in all respects for all of the code used other than - * OpenSSL. If you modify file(s) with this exception, you may extend this - * exception to your version of the file(s), but you are not obligated to do + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do * so. If you do not wish to do so, delete this exception statement from your - * version. If you delete this exception statement from all source files in + * version. If you delete this exception statement from all source files in * the program, then also delete it here. - * + * * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. - * + * * You should have received a copy of the GNU General Public License along * with SLURM; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. @@ -84,7 +84,7 @@ typedef enum task_state { typedef struct task_info { pthread_mutex_t mutex; /* mutex to protect task state */ slurmd_task_state_t state; /* task state */ - + int id; /* local task id */ uint32_t gtid; /* global task id */ pid_t pid; /* task pid */ @@ -185,7 +185,7 @@ typedef struct slurmd_job { pid_t jmgr_pid; /* job manager pid */ pid_t pgid; /* process group id for tasks */ - uint16_t task_flags; + uint16_t task_flags; uint16_t multi_prog; uint16_t overcommit; env_t *envtp; @@ -212,8 +212,8 @@ void job_kill(slurmd_job_t *job, int signal); void job_destroy(slurmd_job_t *job); -struct srun_info * srun_info_create(slurm_cred_t *cred, slurm_addr *respaddr, - slurm_addr *ioaddr); +struct srun_info * srun_info_create(slurm_cred_t *cred, slurm_addr *respaddr, + slurm_addr *ioaddr); void srun_info_destroy(struct srun_info *srun); diff --git a/src/slurmd/slurmstepd/task.c b/src/slurmd/slurmstepd/task.c index 1436f72e15a0a4ec93c2bab55c1f75655dd6a984..daa5e5c4257db566e304c24b63b81084a2ff2d60 100644 --- a/src/slurmd/slurmstepd/task.c +++ b/src/slurmd/slurmstepd/task.c @@ -100,68 +100,81 @@ * Static prototype definitions. */ static void _make_tmpdir(slurmd_job_t *job); -static void _print_stdout(char *buf); static int _run_script_and_set_env(const char *name, const char *path, slurmd_job_t *job); -static void _update_env(char *buf, char ***env); +static void _proc_stdout(char *buf, char ***env); static char *_uint32_array_to_str(int array_len, const uint32_t *array); -/* Search for "export NAME=value" records in buf and - * use them to add environment variables to env */ -static void _update_env(char *buf, char ***env) +/* + * Process TaskProlog output + * "export NAME=value" adds environment variables + * "unset NAME" clears an environment variable + * "print <whatever>" writes that to the job's stdout + */ +static void _proc_stdout(char *buf, char ***env) { - char *tmp_ptr, *name_ptr, *val_ptr, *buf_ptr = buf; - - while ((tmp_ptr = strstr(buf_ptr, "export"))) { - buf_ptr += 6; - while (isspace(buf_ptr[0])) - buf_ptr++; - if (buf_ptr[0] == '=') /* mal-formed */ - continue; - name_ptr = buf_ptr; /* start of env var name */ - while ((buf_ptr[0] != '=') && (buf_ptr[0] != '\0')) - buf_ptr++; - if (buf_ptr[0] == '\0') /* mal-formed */ - continue; - buf_ptr[0] = '\0'; /* end of env var name */ - buf_ptr++; - val_ptr = buf_ptr; /* start of env var value */ - while ((!isspace(buf_ptr[0])) && (buf_ptr[0] != '\0')) - buf_ptr++; - if (isspace(buf_ptr[0])) { - buf_ptr[0] = '\0';/* end of env var value */ - buf_ptr++; + bool end_buf = false; + int len; + char *buf_ptr, *name_ptr, *val_ptr; + char *end_line, *equal_ptr; + + buf_ptr = buf; + while (buf_ptr[0]) { + end_line = strchr(buf_ptr, '\n'); + if (!end_line) { + end_line = buf_ptr + strlen(buf_ptr); + end_buf = true; } - debug("name:%s:val:%s:",name_ptr,val_ptr); - if (setenvf(env, name_ptr, "%s", val_ptr)) - error("Unable to set %s environment variable", - name_ptr); - } -} - -/* Search for "print <whatever>" records in buf and - * write that to the job's stdout */ -static void _print_stdout(char *buf) -{ - char *tmp_ptr, *buf_ptr = buf; - - while ((tmp_ptr = strstr(buf_ptr, "print "))) { - if ((tmp_ptr != buf_ptr) && (tmp_ptr[-1] != '\n')) { - /* Skip "print " if not at start of a line */ - buf_ptr +=6; - continue; + if (!strncmp(buf_ptr, "print ", 6)) { + buf_ptr += 6; + while (isspace(buf_ptr[0])) + buf_ptr++; + len = end_line - buf_ptr + 1; + safe_write(1, buf_ptr, len); + } else if (!strncmp(buf_ptr, "export ",7)) { + name_ptr = buf_ptr + 7; + while (isspace(name_ptr[0])) + name_ptr++; + equal_ptr = strchr(name_ptr, '='); + if (!equal_ptr || (equal_ptr > end_line)) + goto rwfail; + val_ptr = equal_ptr + 1; + while (isspace(equal_ptr[-1])) + equal_ptr--; + equal_ptr[0] = '\0'; + end_line[0] = '\0'; + debug("export name:%s:val:%s:", name_ptr, val_ptr); + if (setenvf(env, name_ptr, "%s", val_ptr)) { + error("Unable to set %s environment variable", + buf_ptr); + } + equal_ptr[0] = '='; + if (end_buf) + end_line[0] = '\0'; + else + end_line[0] = '\n'; + } else if (!strncmp(buf_ptr, "unset ", 6)) { + name_ptr = buf_ptr + 6; + while (isspace(name_ptr[0])) + name_ptr++; + if ((name_ptr[0] == '\n') || (name_ptr[0] == '\0')) + goto rwfail; + while (isspace(end_line[-1])) + end_line--; + end_line[0] = '\0'; + debug(" unset name:%s:", name_ptr); + unsetenvp(*env, name_ptr); + if (end_buf) + end_line[0] = '\0'; + else + end_line[0] = '\n'; } - buf_ptr = tmp_ptr + 6; - tmp_ptr = strchr(buf_ptr, '\n'); - if (tmp_ptr) { - safe_write(1, buf_ptr, (tmp_ptr - buf_ptr + 1)); - buf_ptr = tmp_ptr + 1; - } else { - safe_write(1, buf_ptr, strlen(buf_ptr)); + +rwfail: /* process rest of script output */ + if (end_buf) break; - } - } -rwfail: + buf_ptr = end_line + 1; + } return; } @@ -180,7 +193,7 @@ _run_script_and_set_env(const char *name, const char *path, slurmd_job_t *job) { int status, rc, nread; pid_t cpid; - int pfd[2]; + int pfd[2], offset = 0; char buf[4096]; xassert(job->env); @@ -211,6 +224,8 @@ _run_script_and_set_env(const char *name, const char *path, slurmd_job_t *job) error("couldn't do the dup: %m"); close(2); close(0); + close(pfd[0]); + close(pfd[1]); #ifdef SETPGRP_TWO_ARGS setpgrp(0, 0); #else @@ -222,12 +237,11 @@ _run_script_and_set_env(const char *name, const char *path, slurmd_job_t *job) } close(pfd[1]); - while ((nread = read(pfd[0], buf, sizeof(buf))) > 0) { - buf[nread] = 0; - //debug("read %d:%s:", nread, buf); - _update_env(buf, &job->env); - _print_stdout(buf); - } + buf[0] = '\0'; + while ((nread = read(pfd[0], buf+offset, (sizeof(buf)-offset))) > 0) + offset += nread; + /* debug ("read %d:%s:", offset, buf); */ + _proc_stdout(buf, &job->env); close(pfd[0]); while (1) { diff --git a/src/smap/configure_functions.c b/src/smap/configure_functions.c index 6b6b34a08982f2d07b34c39ce578004a4df8dfa2..0f1ffeec0726e6d439e18c05ec858fbcb6c058f6 100644 --- a/src/smap/configure_functions.c +++ b/src/smap/configure_functions.c @@ -77,7 +77,8 @@ static void _delete_allocated_blocks(List allocated_blocks) allocated_block_t *allocated_block = NULL; while ((allocated_block = list_pop(allocated_blocks)) != NULL) { - remove_block(allocated_block->nodes,0); + remove_block(allocated_block->nodes, 0, + allocated_block->request->conn_type); list_destroy(allocated_block->nodes); delete_ba_request(allocated_block->request); xfree(allocated_block); @@ -835,7 +836,9 @@ static int _remove_allocation(char *com, List allocated_blocks) } else if(allocated_block->letter == letter) { found=1; remove_block(allocated_block->nodes, - color_count); + color_count, + allocated_block->request-> + conn_type); list_destroy(allocated_block->nodes); delete_ba_request(allocated_block->request); list_remove(results_i); diff --git a/src/srun/allocate.c b/src/srun/allocate.c index 5637ca969b04a5d7b0cbeb16228c232ae8504e56..31f76c3bbef853c132ee889b340c2bedcffcb5ed 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -387,7 +387,6 @@ allocate_nodes(void) xsignal(SIGUSR2, _signal_while_allocating); while (!resp) { -//WHAT IS THIS? resp = slurm_allocate_resources_blocking(j, opt.immediate, _set_pending_job_id); if (destroy_job) { @@ -520,7 +519,7 @@ slurmctld_msg_init(void) * (see opt.h) */ job_desc_msg_t * -job_desc_msg_create_from_opts () +job_desc_msg_create_from_opts (void) { job_desc_msg_t *j = xmalloc(sizeof(*j)); char buf[8192]; @@ -536,6 +535,8 @@ job_desc_msg_create_from_opts () j->name = xstrdup(opt.job_name); else j->name = xstrdup(opt.cmd_name); + if (opt.acctg_freq >= 0) + j->acctg_freq = opt.acctg_freq; j->reservation = xstrdup(opt.reservation); j->wckey = xstrdup(opt.wckey); diff --git a/src/srun/srun.c b/src/srun/srun.c index c2f826dd3dc925bc2c8b151c87faf52d6c2fc8d2..abfc55af8def5f289814e985706dbdc44b09591c 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -156,6 +156,7 @@ static int _slurm_debug_env_val (void); static void _task_start(launch_tasks_response_msg_t *msg); static void _task_finish(task_exit_msg_t *msg); static char *_uint16_array_to_str(int count, const uint16_t *array); +static int _validate_relative(resource_allocation_response_msg_t *resp); /* * from libvirt-0.6.2 GPL2 @@ -296,6 +297,8 @@ int srun(int ac, char **av) if (opt.exclusive) _step_opt_exclusive(); _set_cpu_env_var(resp); + if (_validate_relative(resp)) + exit(error_exit); job = job_step_create_allocation(resp); slurm_free_resource_allocation_response_msg(resp); @@ -315,12 +318,22 @@ int srun(int ac, char **av) exit(error_exit); } #endif + if (opt.relative_set && opt.relative) { + error("--relative option ignored in job allocation " + "request"); + opt.relative = NO_VAL; + opt.relative_set = false; + } if ( !(resp = allocate_nodes()) ) exit(error_exit); got_alloc = 1; _print_job_information(resp); _set_cpu_env_var(resp); + if (_validate_relative(resp)) { + slurm_complete_job(resp->job_id, 1); + exit(error_exit); + } job = job_create_allocation(resp); opt.exclusive = false; /* not applicable for this step */ @@ -735,6 +748,25 @@ static void _set_cpu_env_var(resource_allocation_response_msg_t *resp) return; } +static int _validate_relative(resource_allocation_response_msg_t *resp) +{ + + if (opt.relative_set && + ((opt.relative + opt.min_nodes) > resp->node_cnt)) { + if (opt.nodes_set_opt) { /* -N command line option used */ + error("--relative and --nodes option incompatable " + "with count of allocated nodes (%d+%d>%d)", + opt.relative, opt.min_nodes, resp->node_cnt); + } else { /* SLURM_NNODES option used */ + error("--relative and SLURM_NNODES option incompatable " + "with count of allocated nodes (%d+%d>%d)", + opt.relative, opt.min_nodes, resp->node_cnt); + } + return -1; + } + return 0; +} + /* Set SLURM_RLIMIT_* environment variables with current resource * limit values, reset RLIMIT_NOFILE to maximum possible value */ static int _set_rlimit_env(void) @@ -1035,8 +1067,13 @@ _task_start(launch_tasks_response_msg_t *msg) int taskid; int i; - verbose("Node %s (%d), %d tasks started", - msg->node_name, msg->srun_node_id, msg->count_of_pids); + if(msg->count_of_pids) + verbose("Node %s, %d tasks started", + msg->node_name, msg->count_of_pids); + else + error("No tasks started on node %s: %s", + msg->node_name, slurm_strerror(msg->return_code)); + for (i = 0; i < msg->count_of_pids; i++) { taskid = msg->task_ids[i]; diff --git a/testsuite/expect/test21.27 b/testsuite/expect/test21.27 index 0adc4cf537bfd3759546bcffbbd3716e4e63a20f..81fa81a04dd242e21f6a07218fb501fc0e2ef180 100755 --- a/testsuite/expect/test21.27 +++ b/testsuite/expect/test21.27 @@ -50,11 +50,10 @@ set ac3 pa3 set usr user set us1 usr1 set us2 usr2 -set us3 gdm +set us3 mysql set access_err 0 set timeout 120 -#set user_name "id -u -n" print_header $test_id