From 08d1c29632a8ddcc2d3bcf106bc91abb07d9c9d5 Mon Sep 17 00:00:00 2001
From: Mehdi Dogguy <mehdi@debian.org>
Date: Mon, 8 Sep 2014 21:30:59 +0200
Subject: [PATCH] Imported Upstream version 1.2.23

---
 META                                          |   4 +-
 NEWS                                          |  27 +-
 doc/html/configurator.html.in                 |   1 +
 doc/man/man1/srun.1                           |   3 +-
 doc/man/man5/slurm.conf.5                     |  10 +-
 slurm.spec                                    |  10 +-
 slurm/slurm.h.in                              |   1 +
 src/api/Makefile.am                           |  21 +-
 src/api/Makefile.in                           |  26 +-
 src/common/env.c                              |  50 ++--
 src/common/env.h                              |   5 +-
 src/common/hostlist.c                         |  31 ++-
 src/common/node_select.c                      |  38 ++-
 src/common/read_config.c                      |  13 +-
 src/common/read_config.h                      |   1 +
 src/common/slurm_protocol_api.c               |  14 +
 src/common/slurm_protocol_api.h               |   5 +
 src/common/slurm_step_layout.c                |   4 +-
 src/plugins/mpi/mvapich/mvapich.c             | 258 +++++++++++++-----
 src/plugins/sched/wiki/get_jobs.c             |  10 +-
 src/plugins/sched/wiki/start_job.c            |   4 +-
 src/plugins/sched/wiki2/start_job.c           |   4 +-
 .../block_allocator/block_allocator.c         |   3 +-
 .../block_allocator/block_allocator.h         |   3 +-
 .../select/bluegene/plugin/bg_job_place.c     |  85 ++----
 .../select/bluegene/plugin/bg_job_run.c       |   5 +-
 src/plugins/select/bluegene/plugin/bluegene.c | 156 +++++++----
 .../select/bluegene/plugin/select_bluegene.c  |   4 +-
 src/salloc/opt.c                              |   4 +
 src/sbatch/opt.c                              |   4 +
 src/sbatch/sbatch.c                           |   9 +-
 src/slaunch/opt.c                             |   3 +
 src/slurmctld/controller.c                    |   6 +-
 src/slurmctld/job_mgr.c                       |   5 +-
 src/slurmctld/node_mgr.c                      |   5 +-
 src/slurmctld/node_scheduler.c                |  24 +-
 src/slurmctld/partition_mgr.c                 |  18 +-
 src/slurmctld/proc_req.c                      |   3 +-
 src/slurmctld/step_mgr.c                      |  12 +-
 src/slurmd/slurmstepd/mgr.c                   |   4 +-
 src/smap/configure_functions.c                |   9 +-
 src/smap/partition_functions.c                |   2 +
 src/srun/allocate.c                           |  10 +-
 src/srun/opt.c                                |   7 +-
 src/sview/part_info.c                         |   5 +-
 45 files changed, 627 insertions(+), 299 deletions(-)

diff --git a/META b/META
index 7061ad1f7..60cb66771 100644
--- a/META
+++ b/META
@@ -3,9 +3,9 @@
   Api_revision:  0
   Major:         1
   Meta:          1
-  Micro:         22
+  Micro:         23
   Minor:         2
   Name:          slurm
   Release:       1
   Release_tags:  
-  Version:       1.2.22
+  Version:       1.2.23
diff --git a/NEWS b/NEWS
index 212ceda0a..2cb6a555b 100644
--- a/NEWS
+++ b/NEWS
@@ -3,6 +3,31 @@ documents those changes that are of interest to users and admins.
 
 * Changes in SLURM 1.2.23
 =========================
+ -- Fix for libpmi to not export unneeded variables like xstr*
+ -- BLUEGENE - added per partition dynamic block creation
+ -- fix infinite loop bug in sview when there were multiple partitions
+ -- Send message to srun command when a job is requeued due to node failure.
+    Note this will be overwritten in the output file unless JobFileAppend
+    is set in slurm.conf. In slurm version 1.3, srun's --open-mode=append
+    option will offer this control for each job.
+ -- Change a node's default TmpDisk from 1MB to 0MB and change job's default 
+    disk space requirement from 1MB to 0MB.
+ -- In sched/wiki (Maui scheduler) specify a QOS (quality of service) by 
+    specifying an account of the form "qos-name".
+ -- In select/linear, fix bug in scheduling required nodes that already have
+    a job running on them (req.load.patch from Chris Holmes, HP).
+ -- For use with Moab only: change timeout for srun/sbatch --get-user-env 
+    option to 2 secs, don't get DISPLAY environment variables, but explicitly 
+    set ENVIRONMENT=BATCH and HOSTNAME to the execution host of the batch script.
+ -- Add configuration parameter GetEnvTimeout for use with Moab. See
+    "man slurm.conf" for details.
+ -- Modify salloc and sbatch to accept both "--tasks" and "--ntasks" as 
+    equivalent options for compatibility with srun.
+ -- If a partition's node list contains space separators, replace them with 
+    commas for easier parsing.
+ -- BLUEGENE - fixed bug in geometry specs when creating a block.
+ -- Add support for Moab and Maui to start jobs with select/cons_res plugin
+    and jobs requiring more than one CPU per task.
 
 * Changes in SLURM 1.2.22
 =========================
@@ -2790,4 +2815,4 @@ documents those changes that are of interest to users and admins.
  -- Change directory to /tmp in slurmd if daemonizing.
  -- Logfiles are reopened on reconfigure.
  
-$Id: NEWS 13118 2008-01-29 19:09:00Z da $
+$Id: NEWS 13293 2008-02-15 21:51:16Z jette $
diff --git a/doc/html/configurator.html.in b/doc/html/configurator.html.in
index 8b0faed7b..b7d09d968 100644
--- a/doc/html/configurator.html.in
+++ b/doc/html/configurator.html.in
@@ -159,6 +159,7 @@ function displayfile()
    "InactiveLimit=" + document.config.inactive_limit.value + "<br>" +
    "MinJobAge=" + document.config.min_job_age.value + "<br>" +
    "KillWait=" + document.config.kill_wait.value + "<br>" +
+   "#GetEnvTimeout=2 <br>" +
    "#UnkillableStepProgram= <br>" +
    "#UnkillableStepTimeout=60 <br>" +
    "Waittime=" + document.config.wait_time.value + "<br>" +
diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1
index 15bf02013..e58121e2c 100644
--- a/doc/man/man1/srun.1
+++ b/doc/man/man1/srun.1
@@ -1,4 +1,4 @@
-\." $Id: srun.1 13066 2008-01-23 16:34:55Z jette $
+\." $Id: srun.1 13256 2008-02-12 22:02:36Z jette $
 .\"
 .TH SRUN "1" "July 2007" "srun 1.2" "slurm components"
 
@@ -335,6 +335,7 @@ use only one core in each physical CPU
 .B [no]multithread
 [don't] use extra threads with in-core multi-threading
 which can benefit communication intensive applications
+.TP
 .B help
 show this help message
 .RE
diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
index b330630f1..43a950872 100644
--- a/doc/man/man5/slurm.conf.5
+++ b/doc/man/man5/slurm.conf.5
@@ -145,6 +145,14 @@ specific requested value. Job id values generated will incremented by 1
 for each subsequent job. This may be used to provide a meta\-scheduler 
 with a job id space which is disjoint from the interactive jobs. 
 The default value is 1.
+
+.TP
+\fBGetEnvTimeout\fR
+Used for Moab scheduled jobs only. Controls how long job should wait
+in seconds for loading the user's environment before attempting to 
+load it from a cache file. Applies when the srun or sbatch 
+\fI--get-user-env\fR option is used. Default value is 2 seconds.
+
 .TP
 \fBHeartbeatInterval\fR
 Defunct paramter. 
@@ -950,7 +958,7 @@ system is purged as needed so that user jobs have access to
 most of this space. 
 The Prolog and/or Epilog programs (specified in the configuration file) 
 might be used to insure the file system is kept clean. 
-The default value is 1.
+The default value is 0.
 .TP
 \fBWeight\fR
 The priority of the node for scheduling purposes. 
diff --git a/slurm.spec b/slurm.spec
index de1d4ded7..5f7e05c94 100644
--- a/slurm.spec
+++ b/slurm.spec
@@ -1,4 +1,4 @@
-# $Id: slurm.spec 13075 2008-01-23 20:39:30Z da $
+# $Id: slurm.spec 13266 2008-02-13 21:54:50Z da $
 #
 # Note that this package is not relocatable
 
@@ -60,14 +60,14 @@
 %endif
 
 Name:    slurm
-Version: 1.2.22
+Version: 1.2.23
 Release: 1
 
 Summary: Simple Linux Utility for Resource Management
 
 License: GPL 
 Group: System Environment/Base
-Source: slurm-1.2.22.tar.bz2
+Source: slurm-1.2.23.tar.bz2
 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}
 URL: https://computing.llnl.gov/linux/slurm/
 BuildRequires: openssl-devel >= 0.9.6 openssl >= 0.9.6
@@ -211,7 +211,7 @@ SLURM process tracking plugin for SGI job containers.
 #############################################################################
 
 %prep
-%setup -n slurm-1.2.22
+%setup -n slurm-1.2.23
 
 %build
 %configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \
@@ -219,7 +219,7 @@ SLURM process tracking plugin for SGI job containers.
     %{?with_proctrack}	\
     %{?with_ssl}		\
     %{?with_munge}      \
-	%{!?with_readline:--without-readline} \
+	%{!?slurm_with_readline:--without-readline} \
     %{?with_cflags}
 
 make %{?_smp_mflags} 
diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in
index fa7596d07..7f76820d6 100644
--- a/slurm/slurm.h.in
+++ b/slurm/slurm.h.in
@@ -868,6 +868,7 @@ typedef struct slurm_ctl_conf {
 	char *job_comp_type;	/* job completion logger type */
 	char *job_comp_loc;	/* job completion logging location */
 	uint16_t job_file_append; /* if set, append to stdout/err file */
+	uint16_t get_env_timeout; /* secs allowed for srun --get-user-env */
 	uint16_t kill_wait;	/* seconds between SIGXCPU to SIGKILL 
 				 * on job termination */
 	char *mail_prog;	/* pathname of mail program */
diff --git a/src/api/Makefile.am b/src/api/Makefile.am
index 6aeddd869..819f5bd2c 100644
--- a/src/api/Makefile.am
+++ b/src/api/Makefile.am
@@ -10,6 +10,10 @@ VERSION_SCRIPT = \
 	version.map
 OTHER_FLAGS = \
 	-Wl,--version-script=$(VERSION_SCRIPT)
+PMI_VERSION_SCRIPT = \
+	pmi_version.map
+PMI_OTHER_FLAGS = \
+	-Wl,--version-script=$(PMI_VERSION_SCRIPT)
 endif
 
 # libslurm version information : 
@@ -46,7 +50,7 @@ lib_LTLIBRARIES = libslurm.la libpmi.la
 # need to be built before anything else.  For instance, this takes care
 # of libpmi.la's dependency on libslurm.la, as long as you use "make"
 # and NOT "make libpmi.la".
-BUILT_SOURCES = $(VERSION_SCRIPT) libslurm.la
+BUILT_SOURCES = $(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT) libslurm.la
 
 # Note that libslurmhelper is mostly the same as libslurm, except that
 # it exports ALL symbols, including those from libcommon, libeio, etc.
@@ -101,8 +105,9 @@ libslurm_la_LDFLAGS        = \
         $(OTHER_FLAGS)
 
 libpmi_la_SOURCES = pmi.c
-libpmi_la_LIBADD = $(convenience_libs)
-libpmi_la_LDFLAGS = $(LIB_LDFLAGS)
+#libpmi_la_LIBADD = $(convenience_libs)
+libpmi_la_LDFLAGS = $(LIB_LDFLAGS) \
+        $(PMI_OTHER_FLAGS)
 
 force:
 $(convenience_libs) : force
@@ -119,8 +124,14 @@ $(VERSION_SCRIPT) :
 	 echo "  local: *;"; \
 	 echo "};") > $(VERSION_SCRIPT)
 
+$(PMI_VERSION_SCRIPT) :
+	(echo "{ global:"; \
+	 echo "   PMI_*;"; \
+	 echo "  local: *;"; \
+	 echo "};") > $(PMI_VERSION_SCRIPT)
+
 CLEANFILES = \
-	$(VERSION_SCRIPT)
+	$(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT)
 
 DISTCLEANFILES = \
-	$(VERSION_SCRIPT)
+	$(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT)
diff --git a/src/api/Makefile.in b/src/api/Makefile.in
index 4e9f9cfe6..2d94ff509 100644
--- a/src/api/Makefile.in
+++ b/src/api/Makefile.in
@@ -74,7 +74,7 @@ am__strip_dir = `echo $$p | sed -e 's|^.*/||'`;
 am__installdirs = "$(DESTDIR)$(libdir)"
 libLTLIBRARIES_INSTALL = $(INSTALL)
 LTLIBRARIES = $(lib_LTLIBRARIES) $(noinst_LTLIBRARIES)
-libpmi_la_DEPENDENCIES = $(convenience_libs)
+libpmi_la_LIBADD =
 am_libpmi_la_OBJECTS = pmi.lo
 libpmi_la_OBJECTS = $(am_libpmi_la_OBJECTS)
 libpmi_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
@@ -280,6 +280,12 @@ INCLUDES = -I$(top_srcdir)
 @WITH_GNU_LD_TRUE@OTHER_FLAGS = \
 @WITH_GNU_LD_TRUE@	-Wl,--version-script=$(VERSION_SCRIPT)
 
+@WITH_GNU_LD_TRUE@PMI_VERSION_SCRIPT = \
+@WITH_GNU_LD_TRUE@	pmi_version.map
+
+@WITH_GNU_LD_TRUE@PMI_OTHER_FLAGS = \
+@WITH_GNU_LD_TRUE@	-Wl,--version-script=$(PMI_VERSION_SCRIPT)
+
 
 # libslurm version information : 
 #
@@ -314,7 +320,7 @@ lib_LTLIBRARIES = libslurm.la libpmi.la
 # need to be built before anything else.  For instance, this takes care
 # of libpmi.la's dependency on libslurm.la, as long as you use "make"
 # and NOT "make libpmi.la".
-BUILT_SOURCES = $(VERSION_SCRIPT) libslurm.la
+BUILT_SOURCES = $(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT) libslurm.la
 
 # Note that libslurmhelper is mostly the same as libslurm, except that
 # it exports ALL symbols, including those from libcommon, libeio, etc.
@@ -366,13 +372,15 @@ libslurm_la_LDFLAGS = \
         $(OTHER_FLAGS)
 
 libpmi_la_SOURCES = pmi.c
-libpmi_la_LIBADD = $(convenience_libs)
-libpmi_la_LDFLAGS = $(LIB_LDFLAGS)
+#libpmi_la_LIBADD = $(convenience_libs)
+libpmi_la_LDFLAGS = $(LIB_LDFLAGS) \
+        $(PMI_OTHER_FLAGS)
+
 CLEANFILES = \
-	$(VERSION_SCRIPT)
+	$(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT)
 
 DISTCLEANFILES = \
-	$(VERSION_SCRIPT)
+	$(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT)
 
 all: $(BUILT_SOURCES)
 	$(MAKE) $(AM_MAKEFLAGS) all-am
@@ -707,6 +715,12 @@ $(VERSION_SCRIPT) :
 	 echo "   client_io_handler_*;"; \
 	 echo "  local: *;"; \
 	 echo "};") > $(VERSION_SCRIPT)
+
+$(PMI_VERSION_SCRIPT) :
+	(echo "{ global:"; \
+	 echo "   PMI_*;"; \
+	 echo "  local: *;"; \
+	 echo "};") > $(PMI_VERSION_SCRIPT)
 # Tell versions [3.59,3.63) of GNU make to not export all variables.
 # Otherwise a system limit (for SysV at least) may be exceeded.
 .NOEXPORT:
diff --git a/src/common/env.c b/src/common/env.c
index 741152a0c..424faa277 100644
--- a/src/common/env.c
+++ b/src/common/env.c
@@ -1,6 +1,6 @@
 /*****************************************************************************\
  *  src/common/env.c - add an environment variable to environment vector
- *  $Id: env.c 12970 2008-01-07 20:16:53Z jette $
+ *  $Id: env.c 13237 2008-02-08 23:16:16Z jette $
  *****************************************************************************
  *  Copyright (C) 2002-2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -53,6 +53,7 @@
 #include "slurm/slurm.h"
 #include "src/common/log.h"
 #include "src/common/env.h"
+#include "src/common/read_config.h"
 #include "src/common/xassert.h"
 #include "src/common/xmalloc.h"
 #include "src/common/xstring.h"
@@ -75,8 +76,6 @@ strong_alias(env_array_append_fmt,	slurm_env_array_append_fmt);
 strong_alias(env_array_overwrite,	slurm_env_array_overwrite);
 strong_alias(env_array_overwrite_fmt,	slurm_env_array_overwrite_fmt);
 
-#define SU_WAIT_MSEC 8000	/* 8000 msec for /bin/su to return user 
-				 * env vars for --get-user-env option */
 #define ENV_BUFSIZE (64 * 1024)
 
 /*
@@ -132,6 +131,18 @@ _extend_env(char ***envp)
 	return (++ep);
 }
 
+/* return true if the environment variables should not be set for 
+ *	srun's --get-user-env option */
+static bool _discard_env(char *name, char *value)
+{
+	if ((strcmp(name, "DISPLAY")     == 0) ||
+	    (strcmp(name, "ENVIRONMENT") == 0) ||
+	    (strcmp(name, "HOSTNAME")    == 0))
+		return true;
+
+	return false;
+}
+
 /*
  * Return the number of elements in the environment `env'
  */
@@ -787,6 +798,7 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc)
  *	SLURM_JOB_NODELIST
  *	SLURM_JOB_CPUS_PER_NODE
  *	ENVIRONMENT=BATCH
+ *	HOSTNAME
  *	LOADLBATCH (AIX only)
  *
  * Sets OBSOLETE variables:
@@ -796,8 +808,9 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc)
  *	SLURM_TASKS_PER_NODE <- poorly named, really CPUs per node
  *	? probably only needed for users...
  */
-void
-env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch)
+extern void
+env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch,
+			const char *node_name)
 {
 	char *tmp;
 	uint32_t num_nodes = 0;
@@ -817,6 +830,8 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch)
 					batch->cpu_count_reps);
 	env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp);
 	env_array_overwrite_fmt(dest, "ENVIRONMENT", "BATCH");
+	if (node_name)
+		env_array_overwrite_fmt(dest, "HOSTNAME", "%s", node_name);
 #ifdef HAVE_AIX
 	/* this puts the "poe" command into batch mode */
 	env_array_overwrite(dest, "LOADLBATCH", "yes");
@@ -1254,9 +1269,10 @@ char **_load_env_cache(const char *username)
 		if (!fgets(line, ENV_BUFSIZE, fp))
 			break;
 		_strip_cr_nl(line);
-		_env_array_entry_splitter(line, name, ENV_BUFSIZE, value, 
-					  ENV_BUFSIZE);
-		env_array_overwrite(&env, name, value);
+		if (_env_array_entry_splitter(line, name, ENV_BUFSIZE, value, 
+					      ENV_BUFSIZE) &&
+		    (!_discard_env(name, value)))
+			env_array_overwrite(&env, name, value);
 	}
 	fclose(fp);
 	return env;
@@ -1271,7 +1287,7 @@ char **_load_env_cache(const char *username)
  * 2. Load the user environment from a cache file. This is used
  *    in the event that option 1 times out.
  *
- * timeout value is in seconds or zero for default (8 secs) 
+ * timeout value is in seconds or zero for default (2 secs) 
  * mode is 1 for short ("su <user>"), 2 for long ("su - <user>")
  * On error, returns NULL.
  *
@@ -1293,7 +1309,7 @@ char **env_array_user_default(const char *username, int timeout, int mode)
 	struct pollfd ufds;
 
 	if (geteuid() != (uid_t)0) {
-		info("WARNING: you must be root to use --get-user-env");
+		fatal("WARNING: you must be root to use --get-user-env");
 		return NULL;
 	}
 
@@ -1340,15 +1356,14 @@ char **env_array_user_default(const char *username, int timeout, int mode)
 	ufds.events = POLLIN;
 
 	/* Read all of the output from /bin/su into buffer */
+	if ((timeout == 0) && ((timeout = slurm_get_env_timeout()) == 0))
+		timeleft = DEFAULT_GET_ENV_TIMEOUT;
 	found = 0;
 	buf_read = 0;
 	bzero(buffer, sizeof(buffer));
 	while (1) {
 		gettimeofday(&now, NULL);
-		if (timeout)
-			timeleft = timeout * 1000;
-		else
-			timeleft = SU_WAIT_MSEC;
+		timeleft = timeout * 1000;
 		timeleft -= (now.tv_sec -  begin.tv_sec)  * 1000;
 		timeleft -= (now.tv_usec - begin.tv_usec) / 1000;
 		if (timeleft <= 0) {
@@ -1394,7 +1409,7 @@ char **env_array_user_default(const char *username, int timeout, int mode)
 	close(fildes[0]);
 	if (!found) {
 		error("Failed to load current user environment variables");
-		_load_env_cache(username);
+		return _load_env_cache(username);
 	}
 
 	/* First look for the start token in the output */
@@ -1424,15 +1439,16 @@ char **env_array_user_default(const char *username, int timeout, int mode)
 			break;
 		}
 		if (_env_array_entry_splitter(line, name, sizeof(name), 
-					      value, sizeof(value)))
+					      value, sizeof(value)) &&
+		    (!_discard_env(name, value)))
 			env_array_overwrite(&env, name, value);
 		line = strtok_r(NULL, "\n", &last);
 	}
 	if (!found) {
 		error("Failed to get all user environment variables");
+		env_array_free(env);
 		return _load_env_cache(username);
 	}
 
 	return env;
 }
-
diff --git a/src/common/env.h b/src/common/env.h
index 94000ed24..3f8901d66 100644
--- a/src/common/env.h
+++ b/src/common/env.h
@@ -116,6 +116,7 @@ void env_array_for_job(char ***dest,
  *	SLURM_JOB_NODELIST
  *	SLURM_JOB_CPUS_PER_NODE
  *	ENVIRONMENT=BATCH
+ *	HOSTNAME
  *	LOADLBATCH (AIX only)
  *
  * Sets OBSOLETE variables:
@@ -125,7 +126,9 @@ void env_array_for_job(char ***dest,
  *	SLURM_TASKS_PER_NODE <- poorly named, really CPUs per node
  *	? probably only needed for users...
  */
-void env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch);
+extern void env_array_for_batch_job(char ***dest, 
+				    const batch_job_launch_msg_t *batch,
+				    const char* node_name);
 
 /*
  * Set in "dest the environment variables relevant to a SLURM job step,
diff --git a/src/common/hostlist.c b/src/common/hostlist.c
index 36b489af1..19f340942 100644
--- a/src/common/hostlist.c
+++ b/src/common/hostlist.c
@@ -1,5 +1,5 @@
 /*****************************************************************************\
- *  $Id: hostlist.c 12632 2007-11-06 23:27:07Z da $
+ *  $Id: hostlist.c 13270 2008-02-14 19:40:44Z da $
  *****************************************************************************
  *  $LSDId: hostlist.c,v 1.14 2003/10/14 20:11:54 grondo Exp $
  *****************************************************************************
@@ -1558,7 +1558,8 @@ error:
  * RET 1 if str contained a valid number or range,
  *	0 if conversion of str to a range failed.
  */
-static int _parse_box_range(char *str, struct _range *ranges, int len, int *count)
+static int _parse_box_range(char *str, struct _range *ranges,
+			    int len, int *count)
 {
 	int a[3], b[3], i1, i2, i;
 	char new_str[8];
@@ -2411,14 +2412,28 @@ _get_boxes(char *buf, int max_len)
 				start_box = i;
 			end_box = i;
 		}
+
+
 		if (((len+8) < max_len) && (start_box != -1)
 		    && ((is_box == 0) || (i == axis_max_x))) {
-			sprintf(buf+len,"%c%c%cx%c%c%c,",
-				alpha_num[start_box], alpha_num[axis_min_y],
-				alpha_num[axis_min_z],
-				alpha_num[end_box], alpha_num[axis_max_y],
-				alpha_num[axis_max_z]);
-			len += 8;
+			if(start_box == end_box
+			   && axis_min_y == axis_max_y
+			   && axis_min_z == axis_max_z) {
+				sprintf(buf+len,"%c%c%c,",
+					alpha_num[start_box],
+					alpha_num[axis_min_y],
+					alpha_num[axis_min_z]);
+				len += 4;
+			} else {
+				sprintf(buf+len,"%c%c%cx%c%c%c,",
+					alpha_num[start_box],
+					alpha_num[axis_min_y],
+					alpha_num[axis_min_z],
+					alpha_num[end_box], 
+					alpha_num[axis_max_y],
+					alpha_num[axis_max_z]);
+				len += 8;
+			}
 			start_box = -1;
 			end_box = -1;
 		}
diff --git a/src/common/node_select.c b/src/common/node_select.c
index 2f9a57d0c..ede1c38fc 100644
--- a/src/common/node_select.c
+++ b/src/common/node_select.c
@@ -9,7 +9,7 @@
  *  the plugin. This is because functions required by the plugin can not be 
  *  resolved on the front-end nodes, so we can't load the plugins there.
  *
- *  $Id: node_select.c 12627 2007-11-06 19:48:55Z jette $
+ *  $Id: node_select.c 13270 2008-02-14 19:40:44Z da $
  *****************************************************************************
  *  Copyright (C) 2002-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -1114,16 +1114,20 @@ extern char *select_g_sprint_jobinfo(select_jobinfo_t jobinfo,
 			sprintf(start_char, "None");
 		else {
 			snprintf(start_char, sizeof(start_char), 
-				"%1ux%1ux%1u", jobinfo->start[0],
-				jobinfo->start[1], jobinfo->start[2]);
+				"%cx%cx%c",
+				 alpha_num[jobinfo->start[0]],
+				 alpha_num[jobinfo->start[1]],
+				 alpha_num[jobinfo->start[2]]);
 		} 
 		snprintf(buf, size, 
-			 "%7.7s %6.6s %6.6s %9s    %1ux%1ux%1u %5s %-16s",
+			 "%7.7s %6.6s %6.6s %9s    %cx%cx%c %5s %-16s",
 			 _job_conn_type_string(jobinfo->conn_type),
 			 _yes_no_string(jobinfo->reboot),
 			 _yes_no_string(jobinfo->rotate),
 			 max_procs_char,
-			 geometry[0], geometry[1], geometry[2],
+			 alpha_num[geometry[0]],
+			 alpha_num[geometry[1]],
+			 alpha_num[geometry[2]],
 			 start_char, jobinfo->bg_block_id);
 		break;
 	case SELECT_PRINT_MIXED:
@@ -1137,18 +1141,22 @@ extern char *select_g_sprint_jobinfo(select_jobinfo_t jobinfo,
 			sprintf(start_char, "None");
 		else {
 			snprintf(start_char, sizeof(start_char),
-				"%1ux%1ux%1u", jobinfo->start[0],
-				jobinfo->start[1], jobinfo->start[2]);
+				"%cx%cx%c",
+				 alpha_num[jobinfo->start[0]],
+				 alpha_num[jobinfo->start[1]],
+				 alpha_num[jobinfo->start[2]]);
 		}
 		
 		snprintf(buf, size, 
 			 "Connection=%s Reboot=%s Rotate=%s MaxProcs=%s "
-			 "Geometry=%1ux%1ux%1u Start=%s Block_ID=%s",
+			 "Geometry=%cx%cx%c Start=%s Block_ID=%s",
 			 _job_conn_type_string(jobinfo->conn_type),
 			 _yes_no_string(jobinfo->reboot),
 			 _yes_no_string(jobinfo->rotate),
 			 max_procs_char,
-			 geometry[0], geometry[1], geometry[2],
+			 alpha_num[geometry[0]],
+			 alpha_num[geometry[1]],
+			 alpha_num[geometry[2]],
 			 start_char, jobinfo->bg_block_id);
 		break;
 	case SELECT_PRINT_BG_ID:
@@ -1167,16 +1175,20 @@ extern char *select_g_sprint_jobinfo(select_jobinfo_t jobinfo,
 			 _yes_no_string(jobinfo->rotate));
 		break;
 	case SELECT_PRINT_GEOMETRY:
-		snprintf(buf, size, "%1ux%1ux%1u",
-			 geometry[0], geometry[1], geometry[2]);
+		snprintf(buf, size, "%cx%cx%c",
+			 alpha_num[geometry[0]],
+			 alpha_num[geometry[1]],
+			 alpha_num[geometry[2]]);
 		break;
 	case SELECT_PRINT_START:
 		if (jobinfo->start[0] == (uint16_t) NO_VAL)
 			sprintf(buf, "None");
 		else {
 			snprintf(buf, size, 
-				 "%1ux%1ux%1u", jobinfo->start[0],
-				 jobinfo->start[1], jobinfo->start[2]);
+				 "%cx%cx%c",
+				 alpha_num[jobinfo->start[0]],
+				 alpha_num[jobinfo->start[1]],
+				 alpha_num[jobinfo->start[2]]);
 		} 
 	case SELECT_PRINT_MAX_PROCS:
 		if (jobinfo->max_procs == NO_VAL)
diff --git a/src/common/read_config.c b/src/common/read_config.c
index 01976c812..79d44156d 100644
--- a/src/common/read_config.c
+++ b/src/common/read_config.c
@@ -140,6 +140,7 @@ s_p_options_t slurm_conf_options[] = {
 	{"JobCredentialPrivateKey", S_P_STRING},
 	{"JobCredentialPublicCertificate", S_P_STRING},
 	{"JobFileAppend", S_P_UINT16},
+	{"GetEnvTimeout", S_P_UINT16},
 	{"KillTree", S_P_UINT16, defunct_option},
 	{"KillWait", S_P_UINT16},
 	{"MailProg", S_P_STRING},
@@ -315,7 +316,7 @@ static int parse_nodename(void **dest, slurm_parser_enum_t type,
 
 		if (!s_p_get_uint32(&n->tmp_disk, "TmpDisk", tbl)
 		    && !s_p_get_uint32(&n->tmp_disk, "TmpDisk", dflt))
-			n->tmp_disk = 1;
+			n->tmp_disk = 0;
 
 		if (!s_p_get_uint32(&n->weight, "Weight", tbl)
 		    && !s_p_get_uint32(&n->weight, "Weight", dflt))
@@ -456,6 +457,13 @@ static int parse_partitionname(void **dest, slurm_parser_enum_t type,
 		if (!s_p_get_string(&p->nodes, "Nodes", tbl)
 		    && !s_p_get_string(&p->nodes, "Nodes", dflt))
 			p->nodes = NULL;
+		else {
+			int i;
+			for (i=0; p->nodes[i]; i++) {
+				if (isspace(p->nodes[i]))
+					p->nodes[i] = ',';
+			}
+		}
 
 		if (!s_p_get_boolean(&p->root_only_flag, "RootOnly", tbl)
 		    && !s_p_get_boolean(&p->root_only_flag, "RootOnly", dflt))
@@ -1432,6 +1440,9 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl)
 	if (!s_p_get_uint16(&conf->job_file_append, "JobFileAppend", hashtbl))
 		conf->job_file_append = 0;
 
+	if (!s_p_get_uint16(&conf->get_env_timeout, "GetEnvTimeout", hashtbl))
+		conf->get_env_timeout = DEFAULT_GET_ENV_TIMEOUT;
+
 	if (!s_p_get_uint16(&conf->kill_wait, "KillWait", hashtbl))
 		conf->kill_wait = DEFAULT_KILL_WAIT;
 
diff --git a/src/common/read_config.h b/src/common/read_config.h
index 6817fe34a..70215d199 100644
--- a/src/common/read_config.h
+++ b/src/common/read_config.h
@@ -52,6 +52,7 @@ extern char *default_plugstack;
 #define DEFAULT_CACHE_GROUPS        0
 #define DEFAULT_FAST_SCHEDULE       1
 #define DEFAULT_FIRST_JOB_ID        1
+#define DEFAULT_GET_ENV_TIMEOUT     2
 /* NOTE: DEFAULT_INACTIVE_LIMIT must be 0 for Blue Gene/L systems */
 #define DEFAULT_INACTIVE_LIMIT      0
 #define DEFAULT_JOB_ACCT_LOGFILE    "/var/log/slurm_accounting.log"
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 550e85514..d8d87d88c 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -182,6 +182,20 @@ void slurm_api_clear_config(void)
 /* 	slurm_mutex_lock(&config_lock); */
 /* } */
 
+/* slurm_get_env_timeout
+ * return default timeout for srun/sbatch --get-user-env option
+ */
+int inline slurm_get_env_timeout(void)
+{
+        int timeout;
+        slurm_ctl_conf_t *conf;
+
+        conf = slurm_conf_lock();
+        timeout = conf->get_env_timeout;
+        slurm_conf_unlock();
+        return timeout;
+}
+
 /* slurm_get_mpi_default
  * get default mpi value from slurmctld_conf object
  * RET char *   - mpi default value from slurm.conf,  MUST be xfreed by caller
diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h
index bd5614c56..10c19e124 100644
--- a/src/common/slurm_protocol_api.h
+++ b/src/common/slurm_protocol_api.h
@@ -96,6 +96,11 @@ int inline slurm_set_api_config(slurm_protocol_config_t * protocol_conf);
  */
 inline slurm_protocol_config_t *slurm_get_api_config();
 
+/* slurm_get_env_timeout
+ * return default timeout for srun/sbatch --get-user-env option
+ */
+int inline slurm_get_env_timeout(void);
+
 /* slurm_get_mpi_default
  * get default mpi value from slurmctld_conf object
  * RET char *   - mpi default value from slurm.conf,  MUST be xfreed by caller
diff --git a/src/common/slurm_step_layout.c b/src/common/slurm_step_layout.c
index 555f7d65f..e49e01dc4 100644
--- a/src/common/slurm_step_layout.c
+++ b/src/common/slurm_step_layout.c
@@ -679,12 +679,12 @@ static int _task_layout_plane(slurm_step_layout_t *step_layout,
 		return SLURM_ERROR;
 	}
 
+#if(0)
+	/* debugging only */
 	for (i=0; i < step_layout->node_cnt; i++) {
 		info("tasks[%d]: %u", i, step_layout->tasks[i]);
 	}
 	
-#if(0)
-	/* debugging only */
 	for (i=0; i < step_layout->node_cnt; i++) {
 		info ("Host %d _plane_ # of tasks %u", i, step_layout->tasks[i]);
 		for (j=0; j<step_layout->tasks[i]; j++) {
diff --git a/src/plugins/mpi/mvapich/mvapich.c b/src/plugins/mpi/mvapich/mvapich.c
index ac07bedab..38f1f5d8e 100644
--- a/src/plugins/mpi/mvapich/mvapich.c
+++ b/src/plugins/mpi/mvapich/mvapich.c
@@ -320,8 +320,12 @@ static int mvapich_read_n (mvapich_state_t *st,  struct mvapich_info *mvi,
 			return (-1);
 		}
 
-		if (n == 0) /* unexpected EOF */
+		if (n == 0) { /* unexpected EOF */
+			error ("mvapich: rank %d: "
+			       "Unexpected EOF (%dB left to read)", 
+			       mvi->rank, nleft);
 			return (-1);
+		}
 
 		nleft -= n;
 		p += n;
@@ -554,52 +558,55 @@ static void mvapich_bcast_hostids (mvapich_state_t *st)
 }
 
 /* Write size bytes from buf into socket for rank */
-static void mvapich_send (mvapich_state_t *st, void* buf, int size, int rank)
+static int mvapich_send (mvapich_state_t *st, void* buf, int size, int rank)
 {
 	struct mvapich_info *mvi = st->mvarray [rank];
-	if (mvapich_write_n (st, mvi, buf, size) < 0)
-		error ("mvapich: write hostid rank %d: %m", mvi->rank);
+	return (mvapich_write_n (st, mvi, buf, size));
 }
 
 /* Read size bytes from socket for rank into buf */
-static void mvapich_recv (mvapich_state_t *st, void* buf, int size, int rank)
+static int mvapich_recv (mvapich_state_t *st, void* buf, int size, int rank)
 {
 	struct mvapich_info *mvi = st->mvarray [rank];
-	if (mvapich_read_n (st, mvi, buf, size) <= 0) 
-		error("mvapich reading from %d: %m", mvi->rank);
-}
-
-/* Read an integer from socket for rank */
-static int mvapich_recv_int (mvapich_state_t *st, int rank)
-{
-	int buf;
-	mvapich_recv(st, &buf, sizeof(buf), rank);
-	return buf;
+	return (mvapich_read_n (st, mvi, buf, size)); 
 }
 
 /* Scatter data in buf to ranks using chunks of size bytes */
-static void mvapich_scatterbcast (mvapich_state_t *st, void* buf, int size)
+static int mvapich_scatterbcast (mvapich_state_t *st, void* buf, int size)
 {
-	int i;
-	for (i = 0; i < st->nprocs; i++)
-		mvapich_send(st, buf + i*size, size, i);
+	int i, rc;
+	int n = 0;
+
+	for (i = 0; i < st->nprocs; i++) {
+		if ((rc = mvapich_send (st, buf + i*size, size, i)) <= 0) 
+			return (-1);
+		n += rc;
+	}
+	return (n);
 }
 
 /* Broadcast buf to each rank, which is size bytes big */
-static void mvapich_allgatherbcast (mvapich_state_t *st, void* buf, int size)
+static int mvapich_allgatherbcast (mvapich_state_t *st, void* buf, int size)
 {
-	int i;
-	for (i = 0; i < st->nprocs; i++)
-		mvapich_send(st, buf, size, i);
+	int i, rc;
+	int n = 0;
+
+	for (i = 0; i < st->nprocs; i++) {
+		if ((rc = mvapich_send (st, buf, size, i)) <= 0)
+			return (-1);
+		n += rc;
+	}
+	return (n);
 }
 
 /* Perform alltoall using data in buf with elements of size bytes */
-static void mvapich_alltoallbcast (mvapich_state_t *st, void* buf, int size)
+static int mvapich_alltoallbcast (mvapich_state_t *st, void* buf, int size)
 {
 	int pbufsize = size * st->nprocs;
 	void* pbuf = xmalloc(pbufsize);	
+	int i, src, rc;
+	int n = 0;
 
-	int i, src;
 	for (i = 0; i < st->nprocs; i++) {
 		for (src = 0; src < st->nprocs; src++) {
 			memcpy( pbuf + size*src,
@@ -607,22 +614,141 @@ static void mvapich_alltoallbcast (mvapich_state_t *st, void* buf, int size)
 				size
 				);
 		}
-		mvapich_send(st, pbuf, pbufsize, i);
+		if ((rc = mvapich_send (st, pbuf, pbufsize, i)) <= 0)
+			goto out;
+		n += rc;
 	}
 	
+    out:
 	xfree(pbuf);
+	return (rc < 0 ? rc : n);
+}
+
+static int recv_common_value (mvapich_state_t *st, int *valp, int rank)
+{
+	int val;
+	if (mvapich_recv (st, &val, sizeof (int), rank) <= 0) {
+		error ("mvapich: recv: rank %d: %m\n", rank);
+		return (-1);
+	}
+
+	/*
+	 *  If value is uninitialized, set it to current value,
+	 *   otherwise ensure that current value matches previous
+	 */
+	if (*valp == -1)
+		*valp = val;
+	else if (val != *valp) {
+		error ("mvapich: PMGR: unexpected value from rank %d: "
+		       "expected %d, recvd %d", rank, *valp, val);
+		return (-1);
+	}
+	return (0);
+}
+
+/* 
+ * PMGR_BCAST (root, size of message, then message data (from root only))
+ */
+static int process_pmgr_bcast (mvapich_state_t *st, int *rootp, int *sizep, 
+		void ** bufp, int rank)
+{
+	if (recv_common_value (st, rootp, rank) < 0)
+		return (-1);
+	if (recv_common_value (st, sizep, rank) < 0)
+		return (-1);
+	if (rank != *rootp)
+		return (0);
+
+	/* 
+	 *  Recv data from root 
+	 */
+	*bufp = xmalloc (*sizep);
+	if (mvapich_recv (st, *bufp, *sizep, rank) < 0) {
+		error ("mvapich: PMGR_BCAST: Failed to recv from root: %m");
+		return (-1);
+	}
+	return (0);
+}
+
+/*
+ * PMGR_GATHER (root, size of message, then message data)
+ */
+static int process_pmgr_gather (mvapich_state_t *st, int *rootp, 
+		int *sizep, void **bufp, int rank)
+{
+	if (recv_common_value (st, rootp, rank) < 0)
+		return (-1);
+	if (recv_common_value (st, sizep, rank) < 0)
+		return (-1);
+	if (*bufp == NULL)
+		*bufp = xmalloc (*sizep * st->nprocs);
+		
+	if (mvapich_recv(st, (*bufp) + (*sizep)*rank, *sizep, rank) < 0) {
+		error ("mvapich: PMGR_/GATHER: rank %d: recv: %m", rank);
+		return (-1);
+	}
+	return (0);
 }
 
-/* Check that new == curr value if curr has been initialized */
-static int set_current (int curr, int new)
+/*
+ * PMGR_SCATTER (root, size of message, then message data)
+ */
+static int process_pmgr_scatter (mvapich_state_t *st, int *rootp, 
+		int *sizep, void **bufp, int rank)
 {
-	if (curr == -1)
-		curr = new;
-	if (new != curr) {
-		error("PMGR unexpected value: received %d, expecting %d", 
-			new, curr);
+	if (recv_common_value (st, rootp, rank) < 0)
+		return (-1);
+	if (recv_common_value (st, sizep, rank) < 0)
+		return (-1);
+	if (rank != *rootp)
+		return (0);
+
+	if (*bufp == NULL)
+		*bufp = xmalloc (*sizep * st->nprocs);
+		
+	if (mvapich_recv(st, *bufp, (*sizep) * st->nprocs, rank) < 0) {
+		error ("mvapich: PMGR_SCATTER: rank %d: recv: %m", rank);
+		return (-1);
 	}
-	return curr;
+	return (0);
+}
+
+/*
+ * PMGR_ALLGATHER (size of message, then message data)
+ */
+static int process_pmgr_allgather (mvapich_state_t *st, int *sizep, 
+		void **bufp, int rank)
+{
+	if (recv_common_value (st, sizep, rank) < 0)
+		return (-1);
+	if (*bufp == NULL)
+		*bufp = xmalloc (*sizep * st->nprocs);
+	if (mvapich_recv (st, (*bufp) + *sizep*rank, *sizep, rank) < 0) {
+		error ("mvapich: PMGR_ALLGATHER: rank %d: %m", rank);
+		return (-1);
+	}
+	return (0);
+}
+
+/*
+ * PMGR_ALLTOALL (size of message, then message data)
+ */
+static int process_pmgr_alltoall (mvapich_state_t *st, int *sizep, 
+		void **bufp, int rank)
+{
+	if (recv_common_value (st, sizep, rank) < 0)
+		return (-1);
+
+	if (*bufp == NULL)
+		*bufp = xmalloc (*sizep * st->nprocs * st->nprocs);
+	if (mvapich_recv ( st, 
+	                   *bufp + (*sizep * st->nprocs)*rank,
+	                   *sizep * st->nprocs, rank ) < 0) {
+		error ("mvapich: PMGR_ALLTOALL: recv: rank %d: %m", rank);
+		return (-1);
+	}
+
+	return (0);
 }
 
 /* 
@@ -643,7 +769,7 @@ static int set_current (int curr, int new)
  * Note: Although there are op codes available for PMGR_OPEN and
  * PMGR_ABORT, neither is fully implemented and should not be used.
  */
-static void mvapich_processops (mvapich_state_t *st)
+static int mvapich_processops (mvapich_state_t *st)
 {
 	/* Until a 'CLOSE' or 'ABORT' message is seen, we continuously 
 	 *  loop processing ops
@@ -663,57 +789,57 @@ static void mvapich_processops (mvapich_state_t *st)
 		struct mvapich_info *mvi = st->mvarray [i];
 
 		// read in opcode
-		opcode = set_current(opcode, mvapich_recv_int(st, i));
+		if (recv_common_value (st, &opcode, i) < 0) {
+			error ("mvapich: rank %d: Failed to read opcode: %m", 
+				mvi->rank);
+			return (-1);
+		}
 
 		// read in additional data depending on current opcode
 		int rank, code;
 		switch(opcode) {
 		case 0: // PMGR_OPEN (followed by rank)
-			rank = mvapich_recv_int(st, i);
+			if (mvapich_recv (st, &rank, sizeof (int), i) <= 0) {
+				error ("mvapich: PMGR_OPEN: recv: %m");
+				exit = 1;
+			}
 			break;
 		case 1: // PMGR_CLOSE (no data, close the socket)
 			close(mvi->fd);
 			break;
 		case 2: // PMGR_ABORT (followed by exit code)
-			code = mvapich_recv_int(st, i);
+			if (mvapich_recv (st, &code, sizeof (int), i) <= 0) {
+				error ("mvapich: PMGR_ABORT: recv: %m");
+			}
 			error("mvapich abort with code %d from rank %d", 
 				code, i);
 			break;
 		case 3: // PMGR_BARRIER (no data)
 			break;
-		case 4: // PMGR_BCAST (root, size of message, 
-			// then message data (from root only))
-			root = set_current(root, mvapich_recv_int(st, i));
-			size = set_current(size, mvapich_recv_int(st, i));
-			if (!buf) buf = (void*) xmalloc(size);
-			if (i == root) mvapich_recv(st, buf, size, i);
+		case 4: // PMGR_BCAST
+			if (process_pmgr_bcast (st, &root, &size, &buf, i) < 0)
+				return (-1);
 			break;
-		case 5: // PMGR_GATHER (root, size of message, 
-			// then message data)
-			root = set_current(root, mvapich_recv_int(st, i));
-			size = set_current(size, mvapich_recv_int(st, i));
-			if (!buf) buf = (void*) xmalloc(size * st->nprocs);
-			mvapich_recv(st, buf + size*i, size, i);
+		case 5: // PMGR_GATHER 
+			if (process_pmgr_gather (st, &root, &size, &buf, i) < 0)
+				return (-1);
 			break;
-		case 6: // PMGR_SCATTER (root, size of message, 
-			// then message data)
-			root = set_current(root, mvapich_recv_int(st, i));
-			size = set_current(size, mvapich_recv_int(st, i));
-			if (!buf) buf = (void*) xmalloc(size * st->nprocs);
-			if (i == root) mvapich_recv(st, buf, size * st->nprocs, i);
+		case 6: // PMGR_SCATTER 
+			if (process_pmgr_scatter (st, &root, 
+			                          &size, &buf, i) < 0)
+				return (-1);
 			break;
-		case 7: // PMGR_ALLGATHER (size of message, then message data)
-			size = set_current(size, mvapich_recv_int(st, i));
-			if (!buf) buf = (void*) xmalloc(size * st->nprocs);
-			mvapich_recv(st, buf + size*i, size, i);
+		case 7: // PMGR_ALLGATHER 
+			if (process_pmgr_allgather (st, &size, &buf, i) < 0)
+				return (-1);
 			break;
-		case 8: // PMGR_ALLTOALL (size of message, then message data)
-			size = set_current(size, mvapich_recv_int(st, i));
-			if (!buf) buf = (void*) xmalloc(size * st->nprocs * st->nprocs);
-			mvapich_recv(st, buf + (size*st->nprocs)*i, size * st->nprocs, i);
+		case 8: // PMGR_ALLTOALL 
+			if (process_pmgr_alltoall (st, &size, &buf, i) < 0)
+				return (-1);
 			break;
 		default:
 			error("Unrecognized PMGR opcode: %d", opcode);
+			return (-1);
 		}
 	}
 
@@ -767,6 +893,7 @@ static void mvapich_processops (mvapich_state_t *st)
 	xfree(buf);
   } // while(!exit)
   mvapich_debug ("Completed processing PMGR opcodes");
+  return (0);
 }
 
 static void mvapich_bcast (mvapich_state_t *st)
@@ -1158,7 +1285,8 @@ again:
 	}
 
 	if (st->protocol_version == 8) {
-		mvapich_processops(st);
+		if (mvapich_processops(st) < 0)
+			goto fail;
 	} else {
 		mvapich_debug ("bcasting mvapich info to %d tasks", st->nprocs);
 		mvapich_bcast (st);
diff --git a/src/plugins/sched/wiki/get_jobs.c b/src/plugins/sched/wiki/get_jobs.c
index 70c9d91aa..6c6fd1f62 100644
--- a/src/plugins/sched/wiki/get_jobs.c
+++ b/src/plugins/sched/wiki/get_jobs.c
@@ -301,8 +301,14 @@ static char *	_dump_job(struct job_record *job_ptr, int state_info)
 	}
 
 	if (job_ptr->account) {
-		snprintf(tmp, sizeof(tmp),
-			"ACCOUNT=%s;", job_ptr->account);
+		/* allow QOS spec in form "qos-name" */
+		if (!strncmp(job_ptr->account,"qos-",4)) {
+			snprintf(tmp, sizeof(tmp),
+				 "QOS=%s;", job_ptr->account + 4);
+		} else {
+			snprintf(tmp, sizeof(tmp),
+				"ACCOUNT=%s;", job_ptr->account);
+		}
 		xstrcat(buf, tmp);
 	}
 
diff --git a/src/plugins/sched/wiki/start_job.c b/src/plugins/sched/wiki/start_job.c
index f7cdd46d1..180e79742 100644
--- a/src/plugins/sched/wiki/start_job.c
+++ b/src/plugins/sched/wiki/start_job.c
@@ -195,6 +195,7 @@ static int	_start_job(uint32_t jobid, int task_cnt, char *hostlist,
 	 * performs many string compares. */
 	xfree(job_ptr->details->req_node_layout);
 	if (task_cnt && cr_enabled) {
+		uint16_t cpus_per_task = MAX(1, job_ptr->details->cpus_per_task);
 		job_ptr->details->req_node_layout = (uint16_t *)
 			xmalloc(bit_set_count(new_bitmap) * sizeof(uint16_t));
 		bsize = bit_size(new_bitmap);
@@ -212,7 +213,8 @@ static int	_start_job(uint32_t jobid, int task_cnt, char *hostlist,
 					if ((node_idx[node_name_len] == ',') ||
 				 	    (node_idx[node_name_len] == '\0')) {
 						job_ptr->details->
-							req_node_layout[ll]++;
+							req_node_layout[ll] +=
+							cpus_per_task;
 					}
 					node_cur = strchr(node_idx, ',');
 					if (node_cur)
diff --git a/src/plugins/sched/wiki2/start_job.c b/src/plugins/sched/wiki2/start_job.c
index d6fd7dd82..564919f93 100644
--- a/src/plugins/sched/wiki2/start_job.c
+++ b/src/plugins/sched/wiki2/start_job.c
@@ -251,6 +251,7 @@ static int	_start_job(uint32_t jobid, int task_cnt, char *hostlist,
 	 * performs many string compares. */
 	xfree(job_ptr->details->req_node_layout);
 	if (task_cnt && cr_enabled) {
+		uint16_t cpus_per_task = MAX(1, job_ptr->details->cpus_per_task);
 		job_ptr->details->req_node_layout = (uint16_t *)
 			xmalloc(bit_set_count(new_bitmap) * sizeof(uint16_t));
 		bsize = bit_size(new_bitmap);
@@ -268,7 +269,8 @@ static int	_start_job(uint32_t jobid, int task_cnt, char *hostlist,
 					if ((node_idx[node_name_len] == ',') ||
 				 	    (node_idx[node_name_len] == '\0')) {
 						job_ptr->details->
-							req_node_layout[ll]++;
+							req_node_layout[ll] +=
+							cpus_per_task;
 					}
 					node_cur = strchr(node_idx, ',');
 					if (node_cur)
diff --git a/src/plugins/select/bluegene/block_allocator/block_allocator.c b/src/plugins/select/bluegene/block_allocator/block_allocator.c
index f8cc7dc09..9c0467374 100644
--- a/src/plugins/select/bluegene/block_allocator/block_allocator.c
+++ b/src/plugins/select/bluegene/block_allocator/block_allocator.c
@@ -1,7 +1,7 @@
 /*****************************************************************************\
  *  block_allocator.c - Assorted functions for layout of bglblocks, 
  *	 wiring, mapping for smap, etc.
- *  $Id: block_allocator.c 12543 2007-10-23 22:19:49Z jette $
+ *  $Id: block_allocator.c 13150 2008-01-31 22:59:13Z da $
  *****************************************************************************
  *  Copyright (C) 2004 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -1264,6 +1264,7 @@ extern int copy_node_path(List nodes, List dest_nodes)
 #endif	
 	return rc;
 }
+
 extern int check_and_set_node_list(List nodes)
 {
 	int rc = SLURM_ERROR;
diff --git a/src/plugins/select/bluegene/block_allocator/block_allocator.h b/src/plugins/select/bluegene/block_allocator/block_allocator.h
index cc69f7d35..91fe58747 100644
--- a/src/plugins/select/bluegene/block_allocator/block_allocator.h
+++ b/src/plugins/select/bluegene/block_allocator/block_allocator.h
@@ -109,6 +109,7 @@ typedef struct {
 	bool rotate;
 	bool elongate; 
 	List elongate_geos;
+	bitstr_t *avail_node_bitmap;	/* pointer to available nodes */	
 } ba_request_t; 
 
 typedef struct {
@@ -172,7 +173,7 @@ typedef struct
  * ba_node_t: node within the allocation system.
  */
 typedef struct {
-	/* set if using this node in a block*/
+	/* set if using this node in a block */
 	bool used;
 
 	/* coordinates */
diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c
index f03eae075..30d60327b 100644
--- a/src/plugins/select/bluegene/plugin/bg_job_place.c
+++ b/src/plugins/select/bluegene/plugin/bg_job_place.c
@@ -2,7 +2,7 @@
  *  bg_job_place.c - blue gene job placement (e.g. base block selection)
  *  functions.
  *
- *  $Id: bg_job_place.c 12627 2007-11-06 19:48:55Z jette $ 
+ *  $Id: bg_job_place.c 13271 2008-02-14 20:02:00Z da $ 
  *****************************************************************************
  *  Copyright (C) 2004-2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -209,7 +209,6 @@ static int _find_best_block_match(struct job_record* job_ptr,
 	uint32_t req_procs = job_ptr->num_procs;
 	uint32_t proc_cnt;
 	ba_request_t request; 
-	ba_request_t *try_request = NULL; 
 	int i;
 	int rot_cnt = 0;
 	int created = 0;
@@ -256,50 +255,7 @@ static int _find_best_block_match(struct job_record* job_ptr,
 		
 	if(start[X] != (uint16_t)NO_VAL)
 		start_req = 1;
-	if(num_unused_cpus != total_cpus) {
-		/* 
-		   see if we have already tried to create this 
-		   size but couldn't make it right now no reason 
-		   to try again 
-		*/
-		slurm_mutex_lock(&request_list_mutex);
-		itr = list_iterator_create(bg_request_list);
-		while ((try_request = list_next(itr))) {
-			if(start_req) {
-				if ((try_request->start[X] != start[X])
-				    || (try_request->start[Y] != start[Y])
-				    || (try_request->start[Z] != start[Z])) {
-					debug4("got %c%c%c looking for %c%c%c",
-					       alpha_num[try_request->start[X]],
-					       alpha_num[try_request->start[Y]],
-					       alpha_num[try_request->start[Z]],
-					       alpha_num[start[X]],
-					       alpha_num[start[Y]],
-					       alpha_num[start[Z]]);
-					continue;
-				}
-				debug3("found %c%c%c looking for %c%c%c",
-				       alpha_num[try_request->start[X]],
-				       alpha_num[try_request->start[Y]],
-				       alpha_num[try_request->start[Z]],
-				       alpha_num[start[X]],
-				       alpha_num[start[Y]],
-				       alpha_num[start[Z]]);
-			}
-			if(try_request->procs == req_procs) {
-				debug("already tried to create but "
-				      "can't right now.");
-				list_iterator_destroy(itr);
-				slurm_mutex_unlock(&request_list_mutex);
-				if(test_only)
-					return SLURM_SUCCESS;
-				else
-					return SLURM_ERROR;
-			}				
-		}
-		list_iterator_destroy(itr);
-		slurm_mutex_unlock(&request_list_mutex);
-	}
+
 	select_g_get_jobinfo(job_ptr->select_jobinfo,
 			     SELECT_DATA_CONN_TYPE, &conn_type);
 	select_g_get_jobinfo(job_ptr->select_jobinfo,
@@ -471,8 +427,8 @@ try_again:
 		      req_procs, max_procs, proc_cnt);
 		if ((proc_cnt < req_procs)
 		    || ((max_procs != NO_VAL) && (proc_cnt > max_procs))) {
-			/* We use the proccessor count per partition here
-			   mostly to see if we can run on a smaller partition. 
+			/* We use the proccessor count per block here
+			   mostly to see if we can run on a smaller block. 
 			 */
 			convert_num_unit((float)proc_cnt, tmp_char, 
 					 sizeof(tmp_char), UNIT_NONE);
@@ -522,7 +478,7 @@ try_again:
 			continue;
 		}
 				
-		/* Make sure no other partitions are under this partition 
+		/* Make sure no other blocks are under this block 
 		   are booted and running jobs
 		*/
 		itr2 = list_iterator_create(bg_list);
@@ -719,7 +675,12 @@ try_again:
 		request.linuximage = linuximage;
 		request.mloaderimage = mloaderimage;
 		request.ramdiskimage = ramdiskimage;
-	
+		if(job_ptr->details->req_node_bitmap) 
+			request.avail_node_bitmap = 
+				job_ptr->details->req_node_bitmap;
+		else
+			request.avail_node_bitmap = slurm_block_bitmap;
+
 		debug("trying with all free blocks");
 		if(create_dynamic_block(&request, NULL) == SLURM_ERROR) {
 			error("this job will never run on "
@@ -735,24 +696,6 @@ try_again:
 				goto end_it;
 			} 
 
-			/* 
-			   add request to list so we don't try again until 
-			   something happens like a job finishing or 
-			   something so we can try again 
-			*/
-			debug3("adding %d %d", 
-			       request.procs, request.conn_type);
-			try_request = xmalloc(sizeof(ba_request_t));
-			try_request->procs = req_procs;
-			try_request->save_name = NULL;
-			try_request->elongate_geos = NULL;
-			try_request->start_req = request.start_req;
-			for(i=0; i<BA_SYSTEM_DIMENSIONS; i++) 
-				try_request->start[i] = start[i];
-			slurm_mutex_lock(&request_list_mutex);
-			list_push(bg_request_list, try_request);
-			slurm_mutex_unlock(&request_list_mutex);
-		
 			slurm_conf_lock();
 			snprintf(tmp_char, sizeof(tmp_char), "%s%s", 
 				 slurmctld_conf.node_prefix,
@@ -812,6 +755,12 @@ try_again:
 			request.linuximage = linuximage;
 			request.mloaderimage = mloaderimage;
 			request.ramdiskimage = ramdiskimage;
+			if(job_ptr->details->req_node_bitmap) 
+				request.avail_node_bitmap = 
+					job_ptr->details->req_node_bitmap;
+			else
+				request.avail_node_bitmap = slurm_block_bitmap;
+			
 			/* 1- try empty space
 			   2- we see if we can create one in the 
 			   unused bps
diff --git a/src/plugins/select/bluegene/plugin/bg_job_run.c b/src/plugins/select/bluegene/plugin/bg_job_run.c
index f656c0525..5153de568 100644
--- a/src/plugins/select/bluegene/plugin/bg_job_run.c
+++ b/src/plugins/select/bluegene/plugin/bg_job_run.c
@@ -2,7 +2,7 @@
  *  bg_job_run.c - blue gene job execution (e.g. initiation and termination) 
  *  functions.
  *
- *  $Id: bg_job_run.c 11274 2007-03-30 19:39:49Z da $ 
+ *  $Id: bg_job_run.c 13271 2008-02-14 20:02:00Z da $ 
  *****************************************************************************
  *  Copyright (C) 2004-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -589,9 +589,6 @@ static void _term_agent(bg_update_t *bg_update_ptr)
 		      bg_record->bg_block_id,
 		      bg_record->user_name);
 
-		if(bluegene_layout_mode == LAYOUT_DYNAMIC) 
-			remove_from_request_list();
-		
 		if(job_remove_failed) {
 			char time_str[32];
 			slurm_make_time_str(&now, time_str, sizeof(time_str));
diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c
index c684d32f5..30684bf23 100644
--- a/src/plugins/select/bluegene/plugin/bluegene.c
+++ b/src/plugins/select/bluegene/plugin/bluegene.c
@@ -1,7 +1,7 @@
 /*****************************************************************************\
  *  bluegene.c - blue gene node configuration processing module. 
  *
- *  $Id: bluegene.c 12450 2007-10-05 18:22:36Z da $
+ *  $Id: bluegene.c 13271 2008-02-14 20:02:00Z da $
  *****************************************************************************
  *  Copyright (C) 2004 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -59,8 +59,6 @@ List bg_found_block_list = NULL;  	/* found bg blocks already on system */
 List bg_job_block_list = NULL;  	/* jobs running in these blocks */
 List bg_booted_block_list = NULL;  	/* blocks that are booted */
 List bg_freeing_list = NULL;  	        /* blocks that being freed */
-List bg_request_list = NULL;  	        /* list of request that can't 
-					   be made just yet */
 
 List bg_blrtsimage_list = NULL;
 List bg_linuximage_list = NULL;
@@ -80,7 +78,6 @@ uint16_t bridge_api_verb = 0;
 bool agent_fini = false;
 time_t last_bg_update;
 pthread_mutex_t block_state_mutex = PTHREAD_MUTEX_INITIALIZER;
-pthread_mutex_t request_list_mutex = PTHREAD_MUTEX_INITIALIZER;
 int num_block_to_free = 0;
 int num_block_freed = 0;
 int blocks_are_created = 0;
@@ -185,13 +182,6 @@ extern void fini_bg(void)
 	while(destroy_cnt > 0)
 		usleep(1000);
 	
-	slurm_mutex_lock(&request_list_mutex);
-	if (bg_request_list) {
-		list_destroy(bg_request_list);
-		bg_request_list = NULL;
-	}
-	slurm_mutex_unlock(&request_list_mutex);
-		
 	if(bg_blrtsimage_list) {
 		list_destroy(bg_blrtsimage_list);
 		bg_blrtsimage_list = NULL;
@@ -407,6 +397,7 @@ extern void process_nodes(bg_record_t *bg_record)
 					start);
 				if(bg_record->nodes[j] != ',')
 					break;
+				j--;
 			}
 			j++;
 		}
@@ -423,6 +414,8 @@ extern void process_nodes(bg_record_t *bg_record)
 
 	itr = list_iterator_create(bg_record->bg_block_list);
 	while ((ba_node = list_next(itr)) != NULL) {
+		if(!ba_node->used)
+			continue;
 		debug4("%c%c%c is included in this block",
 		       alpha_num[ba_node->coord[X]],
 		       alpha_num[ba_node->coord[Y]],
@@ -1205,6 +1198,89 @@ extern int create_dynamic_block(ba_request_t *request, List my_block_list)
 		debug("No list was given");
 	}
 
+	if(request->avail_node_bitmap) {
+		int j=0, number;
+		int x,y,z;
+		char *nodes = NULL;
+		bitstr_t *bitmap = bit_alloc(node_record_count);
+		int start[BA_SYSTEM_DIMENSIONS];
+		int end[BA_SYSTEM_DIMENSIONS];
+
+		/* we want the bps that aren't in this partition to
+		 * mark them as used
+		 */
+		bit_or(bitmap, request->avail_node_bitmap);
+		bit_not(bitmap);
+		nodes = bitmap2node_name(bitmap);
+		//info("not using %s", nodes);
+		while(nodes[j] != '\0') {
+			if ((nodes[j] == '[' || nodes[j] == ',')
+			    && (nodes[j+8] == ']' || nodes[j+8] == ',')
+			    && (nodes[j+4] == 'x' || nodes[j+4] == '-')) {
+				j++;
+				number = xstrntol(nodes + j,
+						  NULL, BA_SYSTEM_DIMENSIONS,
+						  HOSTLIST_BASE);
+				start[X] = number / 
+					(HOSTLIST_BASE * HOSTLIST_BASE);
+				start[Y] = (number % 
+					    (HOSTLIST_BASE * HOSTLIST_BASE))
+					/ HOSTLIST_BASE;
+				start[Z] = (number % HOSTLIST_BASE);
+				j += 4;
+				number = xstrntol(nodes + j,
+						NULL, 3, HOSTLIST_BASE);
+				end[X] = number /
+					(HOSTLIST_BASE * HOSTLIST_BASE);
+				end[Y] = (number 
+					  % (HOSTLIST_BASE * HOSTLIST_BASE))
+					/ HOSTLIST_BASE;
+				end[Z] = (number % HOSTLIST_BASE);
+				j += 3;
+				for (x = start[X]; x <= end[X]; x++) {
+					for (y = start[Y]; y <= end[Y]; y++) {
+						for (z = start[Z]; 
+						     z <= end[Z]; z++) {
+							ba_system_ptr->
+								grid[x]
+#ifdef HAVE_BG
+								[y][z]
+#endif
+								.used = 1;
+						}
+					}
+				}
+				
+				if(nodes[j] != ',')
+					break;
+				j--;
+			} else if((nodes[j] >= '0' && nodes[j] <= '9')
+				  || (nodes[j] >= 'A' && nodes[j] <= 'Z')) {
+				
+				number = xstrntol(nodes + j,
+						  NULL, BA_SYSTEM_DIMENSIONS,
+						  HOSTLIST_BASE);
+				x = number / (HOSTLIST_BASE * HOSTLIST_BASE);
+				y = (number % (HOSTLIST_BASE * HOSTLIST_BASE))
+					/ HOSTLIST_BASE;
+				z = (number % HOSTLIST_BASE);
+				j+=3;
+				ba_system_ptr->grid[x]
+#ifdef HAVE_BG
+					[y][z]
+#endif
+					.used = 1;
+
+				if(nodes[j] != ',') 
+					break;
+				j--;
+			}
+			j++;
+		}
+		xfree(nodes);
+		FREE_NULL_BITMAP(bitmap);
+	}
+
 	if(request->size==1 && request->procs < bluegene_bp_node_cnt) {
 		request->conn_type = SELECT_SMALL;
 		if(request->procs == (procs_per_node/16)) {
@@ -1228,14 +1304,14 @@ extern int create_dynamic_block(ba_request_t *request, List my_block_list)
 			}
 			num_quarter=4;
 		}
-		
+
 		if(_breakup_blocks(request, my_block_list) != SLURM_SUCCESS) {
 			debug2("small block not able to be placed");
 			//rc = SLURM_ERROR;
 		} else 
 			goto finished;
 	}
-	
+		
 	if(request->conn_type == SELECT_NAV)
 		request->conn_type = SELECT_TORUS;
 	
@@ -1531,31 +1607,6 @@ extern int remove_from_bg_list(List my_bg_list, bg_record_t *bg_record)
 	return rc;
 }
 
-extern int remove_from_request_list()
-{
-	ba_request_t *try_request = NULL; 
-	ListIterator itr;
-	int rc = SLURM_ERROR;
-
-	/* 
-	   remove all requests out of the list.
-	*/
-		
-	slurm_mutex_lock(&request_list_mutex);
-	itr = list_iterator_create(bg_request_list);
-	while ((try_request = list_next(itr)) != NULL) {
-		debug3("removing size %d", 
-		       try_request->procs);
-		list_remove(itr);
-		delete_ba_request(try_request);
-		//list_iterator_reset(itr);
-		rc = SLURM_SUCCESS;
-	}
-	list_iterator_destroy(itr);
-	slurm_mutex_unlock(&request_list_mutex);
-	return rc;
-}
-
 extern int bg_free_block(bg_record_t *bg_record)
 {
 #ifdef HAVE_BG_FILES
@@ -1705,8 +1756,6 @@ extern void *mult_destroy_block(void *args)
 		 */
 		sort_bg_record_inc_size(bg_freeing_list);
 		
-		remove_from_request_list();
-		
 		slurm_mutex_lock(&block_state_mutex);
 		if(remove_from_bg_list(bg_job_block_list, bg_record) 
 		   == SLURM_SUCCESS) {
@@ -2361,7 +2410,7 @@ static int _addto_node_list(bg_record_t *bg_record, int *start, int *end)
 		      alpha_num[DIM_SIZE[X]], alpha_num[DIM_SIZE[Y]], 
 		      alpha_num[DIM_SIZE[Z]]);
 	}
-	debug3("bluegene.conf: %c%c%cx%c%c%c",
+	debug3("adding bps: %c%c%cx%c%c%c",
 	       alpha_num[start[X]], alpha_num[start[Y]], alpha_num[start[Z]],
 	       alpha_num[end[X]], alpha_num[end[Y]], alpha_num[end[Z]]);
 	debug3("slurm.conf:    %c%c%c",
@@ -2380,6 +2429,7 @@ static int _addto_node_list(bg_record_t *bg_record, int *start, int *end)
 				slurm_conf_unlock();
 				ba_node = ba_copy_node(
 					&ba_system_ptr->grid[x][y][z]);
+				ba_node->used = 1;
 				list_append(bg_record->bg_block_list, ba_node);
 				node_count++;
 			}
@@ -2436,12 +2486,6 @@ static void _set_bg_lists()
 		list_destroy(bg_list);
 	bg_list = list_create(destroy_bg_record);
 
-	slurm_mutex_lock(&request_list_mutex);
-	if(bg_request_list) 
-		list_destroy(bg_request_list);
-	bg_request_list = list_create(delete_ba_request);
-	slurm_mutex_unlock(&request_list_mutex);
-	
 	slurm_mutex_unlock(&block_state_mutex);	
 	
 	if(bg_blrtsimage_list)
@@ -2865,6 +2909,14 @@ static int _breakup_blocks(ba_request_t *request, List my_block_list)
 			continue;
 		if(bg_record->state != RM_PARTITION_FREE)
 			continue;
+		if (request->avail_node_bitmap &&
+		    !bit_super_set(bg_record->bitmap,
+				   request->avail_node_bitmap)) {
+			debug2("bg block %s has nodes not usable by this job",
+			       bg_record->bg_block_id);
+			continue;
+		}
+			
 		if(request->start_req) {
 			if ((request->start[X] != bg_record->start[X])
 			    || (request->start[Y] != bg_record->start[Y])
@@ -2889,7 +2941,7 @@ static int _breakup_blocks(ba_request_t *request, List my_block_list)
 		proc_cnt = bg_record->bp_count * 
 			bg_record->cpus_per_bp;
 		if(proc_cnt == request->procs) {
-			debug2("found it here %s, %s",
+			debug("found it here %s, %s",
 			       bg_record->bg_block_id,
 			       bg_record->nodes);
 			request->save_name = xmalloc(4);
@@ -2961,6 +3013,14 @@ static int _breakup_blocks(ba_request_t *request, List my_block_list)
 	       != NULL) {
 		if(bg_record->job_running != NO_JOB_RUNNING)
 			continue;
+		if (request->avail_node_bitmap &&
+		    !bit_super_set(bg_record->bitmap,
+				   request->avail_node_bitmap)) {
+			debug2("bg block %s has nodes not usable by this job",
+			       bg_record->bg_block_id);
+			continue;
+		}
+
 		if(request->start_req) {
 			if ((request->start[X] != bg_record->start[X])
 			    || (request->start[Y] != bg_record->start[Y])
diff --git a/src/plugins/select/bluegene/plugin/select_bluegene.c b/src/plugins/select/bluegene/plugin/select_bluegene.c
index 48752fc99..085543cd2 100644
--- a/src/plugins/select/bluegene/plugin/select_bluegene.c
+++ b/src/plugins/select/bluegene/plugin/select_bluegene.c
@@ -1,7 +1,7 @@
 /*****************************************************************************\
  *  select_bluegene.c - node selection plugin for Blue Gene system.
  * 
- *  $Id: select_bluegene.c 12409 2007-09-26 16:32:16Z jette $
+ *  $Id: select_bluegene.c 13271 2008-02-14 20:02:00Z da $
  *****************************************************************************
  *  Copyright (C) 2004-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -1085,8 +1085,6 @@ extern int select_p_update_node_state (int index, uint16_t state)
 		}
 	}
 #endif
-	if(bluegene_layout_mode == LAYOUT_DYNAMIC) 
-		remove_from_request_list();
 	return SLURM_ERROR;
 }
 
diff --git a/src/salloc/opt.c b/src/salloc/opt.c
index 80b5435bc..6833d2408 100644
--- a/src/salloc/opt.c
+++ b/src/salloc/opt.c
@@ -669,6 +669,7 @@ void set_options(const int argc, char **argv)
 		{"no-kill",       no_argument,       0, 'k'},
 		{"kill-command",  optional_argument, 0, 'K'},
 		{"tasks",         required_argument, 0, 'n'},
+		{"ntasks",        required_argument, 0, 'n'},
 		{"nodes",         required_argument, 0, 'N'},
 		{"overcommit",    no_argument,       0, 'O'},
 		{"partition",     required_argument, 0, 'p'},
@@ -827,6 +828,9 @@ void set_options(const int argc, char **argv)
 #ifdef HAVE_BG
 			info("\tThe nodelist option should only be used if\n"
 			     "\tthe block you are asking for can be created.\n"
+			     "\tIt should also include all the midplanes you\n"
+			     "\twant to use, partial lists may not\n"
+			     "\twork correctly.\n"
 			     "\tPlease consult smap before using this option\n"
 			     "\tor your job may be stuck with no way to run.");
 #endif
diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c
index 82633fa42..d94a24661 100644
--- a/src/sbatch/opt.c
+++ b/src/sbatch/opt.c
@@ -622,6 +622,7 @@ static struct option long_options[] = {
 	{"immediate",     no_argument,       0, 'I'},
 	{"job-name",      required_argument, 0, 'J'},
 	{"no-kill",       no_argument,       0, 'k'},
+	{"tasks",         required_argument, 0, 'n'},	
 	{"ntasks",        required_argument, 0, 'n'},
 	{"nodes",         required_argument, 0, 'N'},
 	{"output",        required_argument, 0, 'o'},
@@ -1154,6 +1155,9 @@ static void _set_options(int argc, char **argv)
 #ifdef HAVE_BG
 			info("\tThe nodelist option should only be used if\n"
 			     "\tthe block you are asking for can be created.\n"
+			     "\tIt should also include all the midplanes you\n"
+			     "\twant to use, partial lists may not\n"
+			     "\twork correctly.\n"
 			     "\tPlease consult smap before using this option\n"
 			     "\tor your job may be stuck with no way to run.");
 #endif
diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c
index 2fd84f78d..9ad0b2a77 100644
--- a/src/sbatch/sbatch.c
+++ b/src/sbatch/sbatch.c
@@ -1,7 +1,7 @@
 /*****************************************************************************\
  *  sbatch.c - Submit a SLURM batch script.
  *
- *  $Id: sbatch.c 12856 2007-12-19 00:18:44Z jette $
+ *  $Id: sbatch.c 13231 2008-02-08 17:16:47Z jette $
  *****************************************************************************
  *  Copyright (C) 2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -216,11 +216,12 @@ static int fill_job_desc_from_opts(job_desc_msg_t *desc)
 		struct passwd *pw = NULL;
 		pw = getpwuid(opt.uid);
 		if (pw != NULL) {
-			desc->environment = env_array_user_default(pw->pw_name,
+			desc->environment = env_array_user_default(
+						pw->pw_name,
 						opt.get_user_env_time,
 						opt.get_user_env_mode);
-			/* FIXME - should we abort if j->environment
-			 * is NULL? */
+			if (desc->environment == NULL)
+				exit(1);	/* error already logged */
 		}
 	}
 	env_array_merge(&desc->environment, (const char **)environ);
diff --git a/src/slaunch/opt.c b/src/slaunch/opt.c
index 83658f41f..de1b25d64 100644
--- a/src/slaunch/opt.c
+++ b/src/slaunch/opt.c
@@ -1145,6 +1145,9 @@ void set_options(const int argc, char **argv)
 #ifdef HAVE_BG
 			info("\tThe nodelist option should only be used if\n"
 			     "\tthe block you are asking for can be created.\n"
+			     "\tIt should also include all the midplanes you\n"
+			     "\twant to use, partial lists may not\n"
+			     "\twork correctly.\n"
 			     "\tPlease consult smap before using this option\n"
 			     "\tor your job may be stuck with no way to run.");
 #endif
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index 99769cf93..58d26e45f 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -1,6 +1,6 @@
 /*****************************************************************************\
  *  controller.c - main control machine daemon for slurm
- *  $Id: controller.c 13077 2008-01-23 22:31:44Z da $
+ *  $Id: controller.c 13156 2008-02-01 17:43:01Z da $
  *****************************************************************************
  *  Copyright (C) 2002-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -842,8 +842,8 @@ static int _gold_mark_all_nodes_down(char *reason, time_t event_time)
 	state_file = xstrdup (slurmctld_conf.state_save_location);
 	xstrcat (state_file, "/node_state");
 	if (stat(state_file, &stat_buf)) {
-		error("_gold_mark_all_nodes_down: could not stat(%s) to record "
-		      "node down time", state_file);
+		debug("_gold_mark_all_nodes_down: could not stat(%s) "
+		      "to record node down time", state_file);
 		xfree(state_file);
 		return rc;
 	}
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 5109f4cf6..923670c92 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -3,7 +3,7 @@
  *	Note: there is a global job list (job_list), time stamp 
  *	(last_job_update), and hash table (job_hash)
  *
- *  $Id: job_mgr.c 13083 2008-01-24 16:28:23Z jette $
+ *  $Id: job_mgr.c 13176 2008-02-04 16:56:57Z jette $
  *****************************************************************************
  *  Copyright (C) 2002-2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -1064,6 +1064,7 @@ extern int kill_running_job_by_node_name(char *node_name, bool step_test)
 			} else if (job_ptr->batch_flag && job_ptr->details &&
 			           (job_ptr->details->no_requeue == 0)) {
 				uint16_t save_state;
+				srun_node_fail(job_ptr->job_id, node_name);
 				info("requeue job %u due to failure of node %s",
 				     job_ptr->job_id, node_name);
 				_set_job_prio(job_ptr);
@@ -2741,7 +2742,7 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate,
 	if (job_desc_msg->job_max_memory == NO_VAL)
 		job_desc_msg->job_max_memory = 1;  /* default 1MB mem per node */
 	if (job_desc_msg->job_min_tmp_disk == NO_VAL)
-		job_desc_msg->job_min_tmp_disk = 1;/* default 1MB disk per node */
+		job_desc_msg->job_min_tmp_disk = 0;/* default 0MB disk per node */
 
 	return SLURM_SUCCESS;
 }
diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c
index 7478bc606..5f07cff6f 100644
--- a/src/slurmctld/node_mgr.c
+++ b/src/slurmctld/node_mgr.c
@@ -4,7 +4,7 @@
  *	hash table (node_hash_table), time stamp (last_node_update) and 
  *	configuration list (config_list)
  *
- *  $Id: node_mgr.c 13068 2008-01-23 18:41:54Z jette $
+ *  $Id: node_mgr.c 13274 2008-02-14 21:32:07Z jette $
  *****************************************************************************
  *  Copyright (C) 2002-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -1919,7 +1919,8 @@ void node_not_resp (char *name, time_t msg_time)
 		error ("node_not_resp unable to find node %s", name);
 		return;
 	}
-	error("Node %s not responding", node_ptr->name);
+	if ((node_ptr->node_state & NODE_STATE_BASE) != NODE_STATE_DOWN)
+		error("Node %s not responding", node_ptr->name);
 	_node_not_resp(node_ptr, msg_time);
 #endif
 }
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index 16f766136..b8903bc62 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -2,7 +2,7 @@
  *  node_scheduler.c - select and allocated nodes to jobs 
  *	Note: there is a global node table (node_record_table_ptr) 
  *
- *  $Id: node_scheduler.c 13061 2008-01-22 21:23:56Z da $
+ *  $Id: node_scheduler.c 13234 2008-02-08 22:13:39Z jette $
  *****************************************************************************
  *  Copyright (C) 2002-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -300,18 +300,26 @@ _pick_best_load(struct job_record *job_ptr, bitstr_t * bitmap,
 		uint32_t req_nodes, bool test_only)
 {
 	bitstr_t *basemap;
-	int i, error_code = EINVAL, node_cnt = 0, prev_cnt = 0, set_cnt;
-
-	basemap = bit_copy(bitmap);
-	if (basemap == NULL)
-		fatal("bit_copy malloc failure");
+	int i, max_bit, error_code = EINVAL;
+	int  node_cnt = 0, prev_cnt = 0, set_cnt;
 
 	set_cnt = bit_set_count(bitmap);
 	if ((set_cnt < min_nodes) ||
 	    ((req_nodes > min_nodes) && (set_cnt < req_nodes)))
 		return error_code;	/* not usable */
 
+	basemap = bit_copy(bitmap);
+	if (basemap == NULL)
+		fatal("bit_copy malloc failure");
+
+	max_bit = bit_size(bitmap) - 1;
 	for (i=0; node_cnt<set_cnt; i++) {
+		/* if req_nodes, then start with those as a baseline */
+		if (job_ptr->details && job_ptr->details->req_node_bitmap) {
+			bit_copybits(bitmap, job_ptr->details->req_node_bitmap);
+		} else {
+			bit_nclear(bitmap, 0, max_bit);
+		}
 		node_cnt = _job_count_bitmap(basemap, bitmap, i);
 		if ((node_cnt == 0) || (node_cnt == prev_cnt))
 			continue;	/* nothing new to test */
@@ -332,7 +340,7 @@ _pick_best_load(struct job_record *job_ptr, bitstr_t * bitmap,
 
 /*
  * Set the bits in 'jobmap' that correspond to bits in the 'bitmap'
- * that are running 'job_cnt' jobs or less, and clear the rest.
+ * that are running 'job_cnt' jobs or less.
  */
 static int
 _job_count_bitmap(bitstr_t * bitmap, bitstr_t * jobmap, int job_cnt) 
@@ -345,8 +353,6 @@ _job_count_bitmap(bitstr_t * bitmap, bitstr_t * jobmap, int job_cnt)
 		    (node_record_table_ptr[i].run_job_cnt <= job_cnt)) {
 			bit_set(jobmap, i);
 			count++;
-		} else {
-			bit_clear(jobmap, i);
 		}
 	}
 	return count;
diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c
index 62ab07a80..4d31c895a 100644
--- a/src/slurmctld/partition_mgr.c
+++ b/src/slurmctld/partition_mgr.c
@@ -2,7 +2,7 @@
  *  partition_mgr.c - manage the partition information of slurm
  *	Note: there is a global partition list (part_list) and
  *	time stamp (last_part_update)
- *  $Id: partition_mgr.c 12452 2007-10-05 19:07:07Z da $
+ *  $Id: partition_mgr.c 13279 2008-02-15 00:14:16Z jette $
  *****************************************************************************
  *  Copyright (C) 2002-2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -835,12 +835,14 @@ int update_part(update_part_msg_t * part_desc)
 		xfree(part_ptr->allow_uids);
 		if ((strcasecmp(part_desc->allow_groups, "ALL") == 0) ||
 		    (part_desc->allow_groups[0] == '\0')) {
-			info("update_part: setting allow_groups to ALL for partition %s", 
+			info("update_part: setting allow_groups to ALL for "
+				"partition %s", 
 				part_desc->name);
 		} else {
 			part_ptr->allow_groups = part_desc->allow_groups;
 			part_desc->allow_groups = NULL;
-			info("update_part: setting allow_groups to %s for partition %s", 
+			info("update_part: setting allow_groups to %s for "
+				"partition %s", 
 				part_ptr->allow_groups, part_desc->name);
 			part_ptr->allow_uids =
 				_get_groups_members(part_ptr->allow_groups);
@@ -852,8 +854,14 @@ int update_part(update_part_msg_t * part_desc)
 
 		if (part_desc->nodes[0] == '\0')
 			part_ptr->nodes = NULL;	/* avoid empty string */
-		else
+		else {
+			int i;
 			part_ptr->nodes = xstrdup(part_desc->nodes);
+			for (i=0; part_ptr->nodes[i]; i++) {
+				if (isspace(part_ptr->nodes[i]))
+					part_ptr->nodes[i] = ',';
+			}
+		}
 
 		error_code = _build_part_bitmap(part_ptr);
 		if (error_code) {
@@ -861,7 +869,7 @@ int update_part(update_part_msg_t * part_desc)
 			part_ptr->nodes = backup_node_list;
 		} else {
 			info("update_part: setting nodes to %s for partition %s", 
-			     part_desc->nodes, part_desc->name);
+			     part_ptr->nodes, part_desc->name);
 			xfree(backup_node_list);
 		}
 	}
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index f5768a3fb..e222d1e86 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -1,7 +1,7 @@
 /*****************************************************************************\
  *  proc_req.c - process incomming messages to slurmctld
  *
- *  $Id: proc_req.c 12413 2007-09-26 17:32:47Z jette $
+ *  $Id: proc_req.c 13237 2008-02-08 23:16:16Z jette $
  *****************************************************************************
  *  Copyright (C) 2002-2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -324,6 +324,7 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr)
 	conf_ptr->job_credential_public_certificate = xstrdup(conf->
 					job_credential_public_certificate);
 	conf_ptr->job_file_append     = conf->job_file_append;
+	conf_ptr->get_env_timeout     = conf->get_env_timeout;
 	conf_ptr->kill_wait           = conf->kill_wait;
 	conf_ptr->mail_prog           = xstrdup(conf->mail_prog);
 	conf_ptr->max_job_cnt         = conf->max_job_cnt;
diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c
index 43aa77897..2da3cfdc5 100644
--- a/src/slurmctld/step_mgr.c
+++ b/src/slurmctld/step_mgr.c
@@ -1,6 +1,6 @@
 /*****************************************************************************\
  *  step_mgr.c - manage the job step information of slurm
- *  $Id: step_mgr.c 12681 2007-11-26 18:56:25Z jette $
+ *  $Id: step_mgr.c 13155 2008-02-01 17:30:43Z jette $
  *****************************************************************************
  *  Copyright (C) 2002-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -701,8 +701,16 @@ step_create(job_step_create_request_msg_t *step_specs,
 	if (job_ptr == NULL)
 		return ESLURM_INVALID_JOB_ID ;
 
-	if ((job_ptr->job_state == JOB_SUSPENDED) || IS_JOB_PENDING(job_ptr))
+	if (job_ptr->job_state == JOB_SUSPENDED)
 		return ESLURM_DISABLED;
+	if (IS_JOB_PENDING(job_ptr)) {
+		/* NOTE: LSF creates a job allocation for batch jobs.
+		 * After the allocation has been made, LSF submits a
+		 * job to run in that allocation (sbatch --jobid= ...).
+		 * If that job is pending either LSF messed up or LSF is
+		 * not being used. We have seen this problem with Moab. */
+		return ESLURM_DUPLICATE_JOB_ID;
+	}
 
 	if (batch_step) {
 		info("user %u attempting to run batch script within "
diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c
index 0091fadc2..6ac03c22d 100644
--- a/src/slurmd/slurmstepd/mgr.c
+++ b/src/slurmd/slurmstepd/mgr.c
@@ -1,6 +1,6 @@
 /*****************************************************************************\
  *  src/slurmd/slurmstepd/mgr.c - job manager functions for slurmstepd
- *  $Id: mgr.c 12647 2007-11-12 17:09:47Z da $
+ *  $Id: mgr.c 13229 2008-02-08 01:02:06Z jette $
  *****************************************************************************
  *  Copyright (C) 2002-2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -280,7 +280,7 @@ mgr_launch_batch_job_setup(batch_job_launch_msg_t *msg, slurm_addr *cli)
 	}
 	
 	/* this is the new way of setting environment variables */
-	env_array_for_batch_job(&job->env, msg);
+	env_array_for_batch_job(&job->env, msg, conf->node_name);
 
 	/* this is the old way of setting environment variables */
 	job->envtp->nprocs = msg->nprocs;
diff --git a/src/smap/configure_functions.c b/src/smap/configure_functions.c
index fdbb327e2..aa5294f71 100644
--- a/src/smap/configure_functions.c
+++ b/src/smap/configure_functions.c
@@ -1,6 +1,6 @@
 /*****************************************************************************\
  *  configure_functions.c - Functions related to configure mode of smap.
- *  $Id: configure_functions.c 11985 2007-08-09 23:07:08Z da $
+ *  $Id: configure_functions.c 13270 2008-02-14 19:40:44Z da $
  *****************************************************************************
  *  Copyright (C) 2002 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -213,7 +213,8 @@ static int _create_allocation(char *com, List allocated_blocks)
 	request->nodecards = 0;
 	request->quarters = 0;
 	request->passthrough = false;
-	
+	request->avail_node_bitmap = NULL;
+
 	while(i<len) {				
 		if(!strncasecmp(com+i, "mesh", 4)) {
 			request->conn_type=SELECT_MESH;
@@ -855,7 +856,8 @@ static int _copy_allocation(char *com, List allocated_blocks)
 		request->rotate_count= 0;
 		request->elongate_count = 0;
 	       	request->elongate_geos = list_create(NULL);
-	
+		request->avail_node_bitmap = NULL;
+
 		results_i = list_iterator_create(request->elongate_geos);
 		while ((geo_ptr = list_next(results_i)) != NULL) {
 			geo = xmalloc(sizeof(int)*3);
@@ -1098,6 +1100,7 @@ static int _add_bg_record(blockreq_t *blockreq, List allocated_blocks)
 			bp_count++;
 			if(nodes[j] != ',')
 				break;
+			j--;
 		}
 		j++;
 	}
diff --git a/src/smap/partition_functions.c b/src/smap/partition_functions.c
index 2dd270b00..9e8c6743b 100644
--- a/src/smap/partition_functions.c
+++ b/src/smap/partition_functions.c
@@ -425,6 +425,7 @@ static int _marknodes(db2_block_info_t *block_ptr, int count)
 							0);
 			if(block_ptr->nodes[j] != ',')
 				break;
+			j--;
 		}
 		j++;
 	}
@@ -914,6 +915,7 @@ static int _make_nodelist(char *nodes, List nodelist)
 			_addto_nodelist(nodelist, start, start);
 			if(nodes[j] != ',')
 				break;
+			j--;
 		}
 		j++;
 	}
diff --git a/src/srun/allocate.c b/src/srun/allocate.c
index a06ea9596..b1abd672a 100644
--- a/src/srun/allocate.c
+++ b/src/srun/allocate.c
@@ -1,6 +1,6 @@
 /*****************************************************************************\
  * src/srun/allocate.c - srun functions for managing node allocations
- * $Id: allocate.c 12700 2007-11-27 23:39:24Z jette $
+ * $Id: allocate.c 13231 2008-02-08 17:16:47Z jette $
  *****************************************************************************
  *  Copyright (C) 2002-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -561,12 +561,12 @@ job_desc_msg_create_from_opts (char *script)
 			struct passwd *pw = NULL;
 			pw = getpwuid(opt.uid);
 			if (pw != NULL) {
-				j->environment =
-					env_array_user_default(pw->pw_name,
+				j->environment = env_array_user_default(
+							pw->pw_name,
 							opt.get_user_env_time,
 							opt.get_user_env_mode);
-				/* FIXME - should we abort if j->environment
-				   is NULL? */
+				if (j->environment == NULL)
+					exit(1);    /* error already logged */
 			}
 		}
 		env_array_merge(&j->environment, (const char **)environ);
diff --git a/src/srun/opt.c b/src/srun/opt.c
index fca5cf8ea..4802c930b 100644
--- a/src/srun/opt.c
+++ b/src/srun/opt.c
@@ -1,6 +1,6 @@
 /*****************************************************************************\
  *  opt.c - options processing for srun
- *  $Id: opt.c 12856 2007-12-19 00:18:44Z jette $
+ *  $Id: opt.c 13270 2008-02-14 19:40:44Z da $
  *****************************************************************************
  *  Copyright (C) 2002-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -1721,6 +1721,9 @@ void set_options(const int argc, char **argv, int first)
 #ifdef HAVE_BG
 			info("\tThe nodelist option should only be used if\n"
 			     "\tthe block you are asking for can be created.\n"
+			     "\tIt should also include all the midplanes you\n"
+			     "\twant to use, partial lists may not\n"
+			     "\twork correctly.\n"
 			     "\tPlease consult smap before using this option\n"
 			     "\tor your job may be stuck with no way to run.");
 #endif
@@ -2168,7 +2171,7 @@ static void _opt_args(int argc, char **argv)
 		char *fullpath;
 		char *cmd       = remote_argv[0];
 		bool search_cwd = (opt.batch || opt.allocate);
-		int  mode       = (search_cwd) ? R_OK : R_OK | X_OK;
+		int  mode       = (search_cwd) ? R_OK : X_OK;
 
 		if ((fullpath = _search_path(cmd, search_cwd, mode))) {
 			xfree(remote_argv[0]);
diff --git a/src/sview/part_info.c b/src/sview/part_info.c
index 48c274ea1..bf53ddb10 100644
--- a/src/sview/part_info.c
+++ b/src/sview/part_info.c
@@ -1390,6 +1390,7 @@ static List _create_part_info_list(partition_info_msg_t *part_info_ptr,
 	ListIterator itr = NULL;
 	hostlist_t hl;
 #ifdef HAVE_BG
+	int j;
 	bg_info_record_t *bg_info_record = NULL;
 	int node_scaling = part_info_ptr->partition_array[0].node_scaling;
 	char *slurm_user = NULL;
@@ -1451,9 +1452,9 @@ static List _create_part_info_list(partition_info_msg_t *part_info_ptr,
 			node_ptr = _find_node(node_name, node_info_ptr);
 			free(node_name);
 #ifdef HAVE_BG
-			for(i=0; i<3; i++) {
+			for(j=0; j<3; j++) {
 				int norm = 0;
-				switch(i) {
+				switch(j) {
 				case SVIEW_BG_IDLE_STATE:
 					/* get the idle node count if
 					 * we don't have any error or
-- 
GitLab