From c7ad38f7601f4ad08dbe0129c8adca3321007050 Mon Sep 17 00:00:00 2001
From: Mehdi Dogguy <mehdi@debian.org>
Date: Mon, 8 Sep 2014 21:30:34 +0200
Subject: [PATCH] Imported Upstream version 1.2.17

---
 BUILD.NOTES                                   |  20 +-
 COPYING                                       |   3 +-
 META                                          |   4 +-
 NEWS                                          |  41 +-
 contribs/mpich1.slurm.patch                   |  53 ++
 doc/html/maui.shtml                           |  13 +-
 doc/man/man1/salloc.1                         |  19 +-
 doc/man/man1/sbatch.1                         |   8 +
 doc/man/man1/srun.1                           |  36 +-
 doc/man/man5/wiki.conf.5                      |  27 +-
 slurm.spec                                    |  12 +-
 src/api/job_info.c                            |   8 +-
 src/api/pmi.c                                 |  49 +-
 src/api/pmi_server.c                          |  46 +-
 src/api/slurm_pmi.c                           | 113 +++-
 src/common/env.c                              |   4 +-
 src/common/hostlist.c                         |  26 +-
 src/common/read_config.c                      |   4 +-
 src/common/slurm_protocol_api.c               |   8 +-
 src/common/timers.c                           |   2 +-
 .../jobacct/common/common_slurmstepd.c        |   2 +-
 src/plugins/mpi/mvapich/mvapich.c             |  35 +-
 src/plugins/sched/backfill/backfill.c         |   8 +-
 src/plugins/sched/wiki/msg.c                  |  25 +
 src/plugins/sched/wiki/msg.h                  |   3 +
 src/plugins/sched/wiki/sched_wiki.c           |  21 +-
 src/plugins/sched/wiki2/hostlist.c            |  43 +-
 src/plugins/sched/wiki2/start_job.c           |   1 -
 .../bluegene/block_allocator/Makefile.am      |  13 +-
 .../bluegene/block_allocator/Makefile.in      |  12 +-
 .../block_allocator/block_allocator.c         | 569 +++++++++---------
 .../block_allocator/block_allocator.h         |   2 +-
 src/plugins/select/cons_res/select_cons_res.c |   4 +-
 src/salloc/opt.c                              |  16 +-
 src/salloc/opt.h                              |   3 +-
 src/salloc/salloc.c                           |  12 +-
 src/slurmctld/agent.c                         |  18 +-
 src/slurmctld/job_mgr.c                       |  41 +-
 src/slurmd/slurmstepd/mgr.c                   |  32 +-
 src/slurmd/slurmstepd/task.c                  |   6 +-
 src/squeue/print.h                            |   4 +-
 src/srun/opt.c                                |  12 +-
 src/srun/srun.c                               |   8 +
 testsuite/expect/Makefile.am                  |   1 +
 testsuite/expect/Makefile.in                  |   1 +
 testsuite/expect/README                       |  65 +-
 testsuite/expect/globals                      |   6 +
 testsuite/expect/regression                   |   6 +-
 testsuite/expect/test1.52                     |   9 +-
 testsuite/expect/test1.88                     |   4 +
 testsuite/expect/test1.91                     |  20 +-
 testsuite/expect/test1.92                     |  14 +-
 testsuite/expect/test11.5                     |   7 +
 testsuite/expect/test14.7                     |   5 +
 testsuite/expect/test15.24                    |  97 +++
 testsuite/expect/test17.32                    |   7 +-
 testsuite/expect/test18.17                    |   2 +-
 testsuite/expect/test7.5                      |  20 +-
 testsuite/expect/test7.6                      |  18 +-
 59 files changed, 1139 insertions(+), 529 deletions(-)
 mode change 100644 => 100755 testsuite/expect/test1.91
 mode change 100644 => 100755 testsuite/expect/test1.92
 create mode 100755 testsuite/expect/test15.24

diff --git a/BUILD.NOTES b/BUILD.NOTES
index 39dd9c95c..2958181f1 100644
--- a/BUILD.NOTES
+++ b/BUILD.NOTES
@@ -33,7 +33,8 @@ Linux cluster (See BlueGene and AIX specific notes below for some differences).
    svn copy https://eris.llnl.gov/svn/slurm/trunk \
      https://eris.llnl.gov/svn/slurm/tags/slurm-1-2-0-0-pre3 \
      -m "description"
-3. Use the rpm make target to create the new RPMs. This requires a .rpmmacros (.rpmrc for newer versions of rpmbuild) file containing:
+3. Use the rpm make target to create the new RPMs. This requires a .rpmmacros 
+   (.rpmrc for newer versions of rpmbuild) file containing:
 	%_slurm_sysconfdir      /etc/slurm
 	%_enable_debug    "--enable-debug"
    I usually build with using the following syntax:
@@ -68,11 +69,12 @@ BlueGene build notes:
 To build and run on AIX:
 0. svn co https://eris.llnl.gov/svn/slurm/trunk slurm
    svn co https://eris.llnl.gov/svn/buildfarm/trunk buildfarm
-   put the buildfarm directory in your search path
-   Also, you will need two commands to appear FIRST in your PATH:
+   Put the buildfarm directory in your search path
+   Also, you will need several commands to appear FIRST in your PATH:
 
       /usr/local/tools/gnu/aix_5_64_fed/bin/install
       /usr/local/gnu/bin/tar
+      /usr/bin/gcc
 
    I do this by making symlinks to those commands in the buildfarm directory,
    then making the buildfarm directory the first one in my PATH.
@@ -85,7 +87,7 @@ To build and run on AIX:
    make
    make uninstall  # remove old shared libraries, aix caches them
    make install
-3. To build RPMs (NOTE: Many GNU tools are required):
+3. To build RPMs (NOTE: GNU tools early in PATH as described above in #0):
    Create a file specifying system specific files:
 	#
 	# RPM Macros for use with SLURM on AIX
@@ -95,10 +97,10 @@ To build and run on AIX:
 	%_prefix                /opt/freeware
 	%_slurm_sysconfdir      %{_prefix}/etc/slurm
         %_defaultdocdir         %{_prefix}/doc
-
-	%_enable_debug    "--enable-debug"
-	%with_ssl         "--with-ssl=/opt/freeware"
-	%with_munge       "--with-munge=/opt/freeware"
+	%_enable_debug          "--enable-debug"
+	%with_ssl               "--with-ssl=/opt/freeware"
+	%with_munge             "--with-munge=/opt/freeware"
+	%with_proctrack         "--with-proctrack=/admin/llnl/include"
    Log in to the machine "uP".  uP is currently the lowest-common-denominator
      AIX machine.
    CC=/usr/bin/gcc build -s https://eris.llnl.gov/svn/slurm/tags/slurm-1-2-0-0-pre3
@@ -113,7 +115,7 @@ To build and run on AIX:
    version.
 8. Install the rpms slurm-*.ppc.rpm, slurm-aix-federation-*.ppc.rpm,
    slurm-auth-munge-*.ppc.rpm and slurm-devel-*.ppc.rpm in 
-   /usr/admin/inst.image/slurm/aix5.3 on an OCF AIX machine 
+   /usr/admin/inst.images/slurm/aix5.3 on an OCF AIX machine 
    (pdev is a good choice).
 
 AIX/Federation switch window problems
diff --git a/COPYING b/COPYING
index 4938ad933..55269c8e3 100644
--- a/COPYING
+++ b/COPYING
@@ -1,5 +1,6 @@
 All SLURM code and documentation is available under the GNU General Public 
-License. 
+License. Some tools in the "contribs" directory have other licenses. See 
+the documentation for individual contributed tools for details. 
 
 In addition, as a special exception, the copyright holders give permission 
 to link the code of portions of this program with the OpenSSL library under 
diff --git a/META b/META
index ae7b593ee..fff8ca7d7 100644
--- a/META
+++ b/META
@@ -3,8 +3,8 @@
   Api_revision:  0
   Major:         1
   Meta:          1
-  Micro:         15
+  Micro:         17
   Minor:         2
   Name:          slurm
   Release:       1
-  Version:       1.2.15
+  Version:       1.2.17
diff --git a/NEWS b/NEWS
index 3e21db491..0e4caf4b0 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,45 @@
 This file describes changes in recent versions of SLURM. It primarily
 documents those changes that are of interest to users and admins.
 
+* Changes in SLURM 1.2.18
+=========================
+
+* Changes in SLURM 1.2.17
+=========================
+ -- In select/cons_res properly release resources allocated to job being 
+    suspended (rmbreak.patch, from Chris Holmes, HP).
+ -- Fix AIX linking problem for PMI (mpich2) support.
+ -- Improve PMI logic for greater scalability (up to 16k tasks run).
+ -- Add srun support for SLURM_THREADS and PMI_FANOUT environment variables.
+ -- Fix support in squeue for output format with left justification of 
+    reason (%r) and reason/node_list (%R) output.
+ -- Automatically requeue a batch job when a node allocated to it fails
+    or the prolog fails (unless --no-requeue or --no-kill option used).
+ -- In sched/wiki, enable use of wiki.conf parameter ExcludePartitions to
+    directly schedule selected partitions without Maui control.
+ -- In sched/backfill, if a job requires specific nodes, schedule other jobs
+    ahead of it rather than completely stopping backfill scheduling for that
+    partition.
+ -- BLUEGENE - corrected logic making block allocation work in a circular 
+    fashion instead of linear.
+ 
+* Changes in SLURM 1.2.16
+=========================
+ -- Add --overcommit option to the salloc command.
+ -- Run task epilog from job's working directory rather than directory
+    where slurmd daemon started from.
+ -- Log errors running task prolog or task epilog to srun's output.
+ -- In sched/wiki2, fix bug processing condensed hostlist expressions.
+ -- Release contribs/mpich1.slurm.patch without GPL license. 
+ -- Fix bug in mvapich plugin for read/write calls that return EAGAIN.
+ -- Don't start MVAPICH timeout logic until we know that srun is starting 
+    an MVAPICH program.
+ -- Fix to srun only allocating number of nodes needed for requested task
+    count when combining allocation and step creation in srun.
+ -- Execute task-prolog within proctrack container to insure that all 
+    child processes get terminated.
+ -- Fixed job accounting to work with sgi_job proctrack plugin.
+
 * Changes in SLURM 1.2.15
 =========================
  -- In sched/wiki2, fix bug processing hostlist expressions where hosts
@@ -2601,4 +2640,4 @@ documents those changes that are of interest to users and admins.
  -- Change directory to /tmp in slurmd if daemonizing.
  -- Logfiles are reopened on reconfigure.
  
-$Id: NEWS 12190 2007-08-31 19:05:18Z jette $
+$Id: NEWS 12383 2007-09-21 21:15:45Z da $
diff --git a/contribs/mpich1.slurm.patch b/contribs/mpich1.slurm.patch
index 738fb9eda..70990087a 100644
--- a/contribs/mpich1.slurm.patch
+++ b/contribs/mpich1.slurm.patch
@@ -1,3 +1,56 @@
+This work was produced at the University of California, Lawrence Livermore 
+National Laboratory (UC LLNL) under contract no. W-7405-ENG-48 (Contract 48)
+between the U.S. Department of Energy (DOE) and The Regents of the University
+of California (University) for the operation of UC LLNL. The rights of the 
+Federal Government are reserved under Contract 48 subject to the restrictions
+agreed upon by the DOE and Universiity as allowed under DOE Acquisition 
+Letter 97-1.
+
+
+DISCLAIMER
+
+This work was prepared as an account of work sponsored by an agency of the
+United States Government. Neither the United States Government nor the 
+University of California nor any of their employees, makes any warranty, 
+express or implied, or assumes any liability or responsibility for the 
+accuracy, completeness, or usefulness of any information, apparatus, product, 
+or process disclosed, or represented that its use would not infringe 
+privately-owned rights. Reference herein to any specific commercial products, 
+process, or service by trade name, trademark, manufacturer or otherwise does 
+not necessarily constitute or imply its endorsement, recommendation, or
+favoring by the United States Government or the University of California. 
+The views and opinions of authors expressed herein do not necessarily state
+or reflect those of the United States Government or the University of 
+California, and shall not be used for advertising or product endorsement 
+purposes.
+
+
+NOTIFICATION OF COMMERCIAL USE
+
+Commercialization of this product is prohibited without notifying the 
+Department of Energy (DOE) or Lawrence Livermore National Laboratory (LLNL).
+
+
+USE OF THIS PATCH
+
+This patch makes use of SLURM's srun command to launch all tasks.
+IMPORTANT: In order to launch more than one task per mode, shared
+memory is used for communications. You must explicitly enable shared
+memory when building MPICH with the following configure line:
+  ./configure --with-device=ch_p4 --with-comm=shared
+
+Applications must be rebuilt with this new library to function 
+with SLURM launch. The "--mpi=mpich1_p4" srun option MUST be 
+used to launch the tasks (it sets a bunch of environment variables 
+and launches only one task per node, the MPICH library launches 
+the other tasks on the node). Here is a sample execute line:  
+   srun --mpi=mpich1_p4 [srun_options...] <progname> [options...]
+
+
+IDENTIFICATION: UCRL-CODE-234229
+
+
+
 Index: mpid/ch_p4/p4/lib/p4_args.c
 ===================================================================
 --- mpid/ch_p4/p4/lib/p4_args.c	(revision 11616)
diff --git a/doc/html/maui.shtml b/doc/html/maui.shtml
index 3331360df..a76aef472 100644
--- a/doc/html/maui.shtml
+++ b/doc/html/maui.shtml
@@ -108,9 +108,11 @@ includes a description of keywords presently only
 supported by the sched/wiki2 plugin for use with the 
 Moab Scheduler.</p>
 
-<p>They only wiki.conf keyword used by the sched/wiki plugin
-is <b>AuthKey</b>, which should match the key used to configure 
-Maui at build time.
+<p>Only two wiki.conf parameters are used by the sched/wiki plugin:
+<b>AuthKey</b> should match the key used to configure 
+Maui at build time and
+<b>ExcludePartitions</b> can be used for SLURM to directly 
+schedule jobs in select partitions without Maui control.
 Note that SLURM's wiki plugin does not include a mechanism
 to submit new jobs, so even without this key nobody could
 run jobs as another user.
@@ -130,11 +132,14 @@ configuration.</p>
 #
 # Matches Maui's --with-key configuration parameter
 AuthKey=42
+#
+# SLURM to directly schedule "debug" partition
+ExcludePartitions=debug
 </pre>
 </p>
 
 <p class="footer"><a href="#top">top</a></p>
 
-<p style="text-align:center;">Last modified 21 August 2007</p>
+<p style="text-align:center;">Last modified 17 September 2007</p>
 
 <!--#include virtual="footer.txt"-->
diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1
index 0d004adac..c4a6925ad 100644
--- a/doc/man/man1/salloc.1
+++ b/doc/man/man1/salloc.1
@@ -1,4 +1,4 @@
-.TH "salloc" "1" "SLURM 1.2" "December 2006" "SLURM Commands"
+.TH "salloc" "1" "SLURM 1.2" "August 2007" "SLURM Commands"
 .SH "NAME"
 .LP 
 salloc \- Obtain a SLURM job allocation (a set of nodes), execute a command, and then release the allocation when the command is finished.
@@ -200,6 +200,13 @@ The default value of this option is one node, but other options implicitly
 increase the default node count. 
 The job will be allocated as many nodes as possible within the range specified 
 and without delaying the initiation of the job.
+The partition's node limits supersede those of the job. 
+If a job's node limits are outside of the range permitted for its 
+associated partition, the job will be left in a PENDING state. 
+This permits possible execution at a later time, when the partition 
+limit is changed.
+If a job node limit exceeds the number of nodes configured in the 
+partition, the job will be rejected.
 
 .TP
 \fB\-n\fR, \fB\-\-tasks\fR[=]<\fInumber\fR>
@@ -222,6 +229,13 @@ is presently ignored if SchedulerType=sched/maui.
 \fB\-\-no\-bell\fR
 Silence salloc's use of the terminal bell. Also see the option \fB\-\-bell\fR.
 
+.TP
+\fB\-O\fR, \fB\-\-overcommit\fR
+Overcommit resources. Normally, \fBsbatch\fR will allocate one cpu per
+task to be executed. By specifying \fB\-\-overcommit\fR you are explicitly
+allowing more than one process per cpu. However no more than
+\fBMAX_TASKS_PER_NODE\fR tasks are permitted to execute per node.
+
 .TP 
 \fB\-p\fR, \fB\-\-partition\fR[=]<\fIpartition name\fR>
 Request a specific partition for the resource allocation.  If not specified,
@@ -376,6 +390,9 @@ Same as \fB\-\-no\-bell\fR.
 \fBSALLOC_NO_ROTATE\fR
 Same as \fB\-R\fR or \fB\-\-no\-rotate\fR.
 .TP
+\fBSLURM_OVERCOMMIT\fR
+Same as \fB\-O, \-\-overcommit\fR
+.TP
 \fBSALLOC_PARTITION\fR
 Same as \fB\-p\fR or \fB\-\-partition\fR.
 .TP
diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1
index 5ee2c5617..86d736f8c 100644
--- a/doc/man/man1/sbatch.1
+++ b/doc/man/man1/sbatch.1
@@ -241,6 +241,13 @@ default value of this option is one node, but other command line options
 may implicitly set the default node count to a higher value.
 The job will be allocated as many nodes as possible within the range specified 
 and without delaying the initiation of the job.
+The partition's node limits supersede those of the job. 
+If a job's node limits are outside of the range permitted for its 
+associated partition, the job will be left in a PENDING state. 
+This permits possible execution at a later time, when the partition 
+limit is changed.
+If a job node limit exceeds the number of nodes configured in the 
+partition, the job will be rejected.
 
 .TP
 \fB\-n\fR, \fB\-\-tasks\fR[=]<\fInumber\fR>
@@ -271,6 +278,7 @@ When a job is requeued, the batch script is initiated from its beginning.
 Specify the number of tasks to be launched per node.
 Equivalent to \fB\-\-tasks\-per\-node\fR.
 
+.TP
 \fB\-O\fR, \fB\-\-overcommit\fR
 Overcommit resources. Normally, \fBsbatch\fR will allocate one cpu per
 task to be executed. By specifying \fB\-\-overcommit\fR you are explicitly
diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1
index f534c2eeb..4b1a28425 100644
--- a/doc/man/man1/srun.1
+++ b/doc/man/man1/srun.1
@@ -1,4 +1,4 @@
-\." $Id: srun.1 12131 2007-08-24 00:06:21Z jette $
+\." $Id: srun.1 12315 2007-09-13 23:56:02Z jette $
 .\"
 .TH SRUN "1" "July 2007" "srun 1.2" "slurm components"
 
@@ -584,10 +584,15 @@ The scheduler may decide to launch the job on more than \fIminnodes\fR nodes.
 A limit on the maximum node count may be specified with \fImaxnodes\fR
 (e.g. "\-\-nodes=2\-4").  The minimum and maximum node count may be the
 same to specify a specific number of nodes (e.g. "\-\-nodes=2\-2" will ask
-for two and ONLY two nodes).  The partition's node 
-limits supersede those of the job. If a job's node limits are completely 
-outside of the range permitted for its associated partition, the job 
-will be left in a PENDING state. Note that the environment 
+for two and ONLY two nodes).  
+The partition's node limits supersede those of the job. 
+If a job's node limits are outside of the range permitted for its 
+associated partition, the job will be left in a PENDING state. 
+This permits possible execution at a later time, when the partition 
+limit is changed.
+If a job node limit exceeds the number of nodes configured in the 
+partition, the job will be rejected.
+Note that the environment 
 variable \fBSLURM_NNODES\fR will be set to the count of nodes actually 
 allocated to the job. See the \fBENVIRONMENT VARIABLES \fR section 
 for more information.  If \fB\-N\fR is not specified, the default
@@ -1164,12 +1169,24 @@ These environment variables, along with their corresponding options,
 are listed below.
 Note: Command line options will always override these settings.
 .TP 22
+\fBPMI_FANOUT\fR
+This is used exclusively with PMI (MPICH2 and MVAPICH2) and
+controls the fanout of data communications. The srun command
+sends messages to application programs (via the PMI library)
+and those applications may be called upon to forward that 
+data to up to this number of additional tasks. Higher values
+offload work from the srun command to the applications and 
+likely increase the vulernability to failures. 
+The default value is 32.
+.TP
 \fBPMI_TIME\fR
 This is used exclusively with PMI (MPICH2 and MVAPICH2) and 
-controls how the much the communications from the tasks to the 
+controls how much the communications from the tasks to the 
 srun are spread out in time in order to avoid overwhelming the 
 srun command with work. The default value is 500 (microseconds)
-per task. On relatively slow processors, higher values may be required.
+per task. On relatively slow processors or systems with very 
+large processsor counts (and large PMI data sets), higher values 
+may be required.
 .TP
 \fBSLURM_CONF\fR
 The location of the SLURM configuration file.
@@ -1279,11 +1296,14 @@ Same as \fB\-\-task\-epilog\fR=\fIexecutable\fR
 \fBSLURM_TASK_PROLOG\fR
 Same as \fB\-\-task\-prolog\fR=\fIexecutable\fR
 .TP
+\fBSLURM_THREADS\fR
+Same as \fB\-T, \-\-threads\fR
+.TP
 \fBSLURM_TIMELIMIT\fR
 Same as \fB\-t, \-\-time\fR=\fIminutes\fR
 .TP
 \fBSLURM_UNBUFFEREDIO\fR
-Same as \fB-u, --unbuffered\fR
+Same as \fB\-u, \-\-unbuffered\fR
 .TP
 \fBSLURM_WAIT\fR
 Same as \fB\-W, \-\-wait\fR=\fIseconds\fR
diff --git a/doc/man/man5/wiki.conf.5 b/doc/man/man5/wiki.conf.5
index 5b6fce9dd..36cae1ca0 100644
--- a/doc/man/man5/wiki.conf.5
+++ b/doc/man/man5/wiki.conf.5
@@ -40,7 +40,7 @@ Not applicable to wiki plugin, only the wiki2 plugin.
 
 .TP
 \fBEPort\fR
-Port to be used to notify Moab of events (job submitted to Slurm, 
+Port to be used to notify Moab of events (job submitted to SLURM, 
 job terminates, etc.). 
 This numeric value should match EPORT configured in the
 \fBmoab.cnf\fR file.
@@ -49,19 +49,26 @@ Not applicable to wiki plugin, only the wiki2 plugin.
 .TP
 \fBExcludePartitions\fR
 Identifies partitions whose jobs are to be scheduled directly 
-by SLURM rather than Moab. 
-This only effects jobs which are submitted using Slurm 
+by SLURM rather than Moab/Maui. 
+This only effects jobs which are submitted using SLURM 
 commands (i.e. srun, salloc or sbatch, NOT msub from Moab).
 These jobs will be scheduled on a First\-Come\-First\-Served 
-basis. 
-This may provide faster response times than Moab scheduling. 
-Moab will account for and report the jobs, but their initiation
-will be outside of Moab's control.
-Note that Moab controls for resource reservation, fair share 
+basis directly by SLURM. 
+Note that SLURM recognizes jobs submitted via msub based 
+upon the value \fBFirstJobId\fR configured in \fIslurm.conf\fR.
+Set the values \fBMINJOBID\fR and \fBMAXJOBID\fR in \fImoab.cfg\fR
+accordingly. 
+For example MINJOBID=1, MAXJOBID=65535 and FirstJobId=65536.
+Jobs submitted using msub will have job ID values in the range 
+of 1 and 65535 while jobs submitted directly using SLURM commands
+will have a job ID of 65536 or higher.
+Moab/Maui controls for resource reservation, fair share 
 scheduling, etc. will not apply to the initiation of these jobs.
+While Moab/Maui will not control the initiation of jobs in these 
+partitions, it will account for and report the jobs.
 If more than one partition is to be scheduled directly by
-Slurm, use a comma separator between their names.
-Not applicable to wiki plugin, only the wiki2 plugin.
+SLURM, use a comma separator between their names.
+This may provide faster response times than Moab/Maui scheduling. 
 
 .TP
 \fBHostFormat\fR
diff --git a/slurm.spec b/slurm.spec
index 3dc1059f4..4c494a9f2 100644
--- a/slurm.spec
+++ b/slurm.spec
@@ -1,16 +1,16 @@
-# $Id: slurm.spec 12088 2007-08-22 18:02:24Z jette $
+# $Id: slurm.spec 12371 2007-09-20 20:45:23Z jette $
 
 # Note that this package is not relocatable
 
 Name:    slurm
-Version: 1.2.15
+Version: 1.2.17
 Release: 1%{?dist}
 
 Summary: Simple Linux Utility for Resource Management
 
 License: GPL 
 Group: System Environment/Base
-Source: slurm-1.2.15.tar.bz2
+Source: slurm-1.2.17.bz2
 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}
 URL: http://www.llnl.gov/linux/slurm
 %ifos linux
@@ -142,7 +142,7 @@ SLURM process tracking plugin for SGI job containers.
 (See http://oss.sgi.com/projects/pagg).
 
 %prep
-%setup -n slurm-1.2.15
+%setup -n slurm-1.2.17
 
 %build
 %configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \
@@ -381,9 +381,9 @@ if [ -x /sbin/ldconfig ]; then
         [ -x /sbin/chkconfig ] && /sbin/chkconfig --add slurm
     fi
 fi
-if [ ! -f ${RPM_BUILD_ROOT}%{_sysconfdir}/slurm.conf ]; then
+if [ ! -f %{_sysconfdir}/slurm.conf ]; then
     echo "You need to build and install a slurm.conf file"
-    echo "Edit ${RPM_BUILD_ROOT}%{_sysconfdir}/slurm.conf.example and copy it to slurm.conf or"
+    echo "Edit %{_sysconfdir}/slurm.conf.example and copy it to slurm.conf or"
     echo "Build a new one using http://www.llnl.gov/linux/slurm/configurator.html"
 fi
 
diff --git a/src/api/job_info.c b/src/api/job_info.c
index 62c67414d..b3a70a6af 100644
--- a/src/api/job_info.c
+++ b/src/api/job_info.c
@@ -1,6 +1,6 @@
 /*****************************************************************************\
  *  job_info.c - get/print the job state information of slurm
- *  $Id: job_info.c 11315 2007-04-06 01:42:39Z morrone $
+ *  $Id: job_info.c 12249 2007-09-11 00:48:52Z jette $
  *****************************************************************************
  *  Copyright (C) 2002-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -128,7 +128,7 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner )
 	char tmp1[128], tmp2[128];
 	char tmp_line[128];
 	char *ionodes = NULL;
-	uint16_t term_sig = 0;
+	uint16_t exit_status = 0, term_sig = 0;
 	char *out = NULL;
 	
 #ifdef HAVE_BG
@@ -193,9 +193,11 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner )
 	xstrcat(out, tmp_line);
 	if (WIFSIGNALED(job_ptr->exit_code))
 		term_sig = WTERMSIG(job_ptr->exit_code);
+	else
+		exit_status = WEXITSTATUS(job_ptr->exit_code);
 	snprintf(tmp_line, sizeof(tmp_line),
 		"ExitCode=%u:%u", 
-		WEXITSTATUS(job_ptr->exit_code), term_sig);
+		exit_status, term_sig);
 	xstrcat(out, tmp_line);
 	if (one_liner)
 		xstrcat(out, " ");
diff --git a/src/api/pmi.c b/src/api/pmi.c
index 6188f5642..677426127 100644
--- a/src/api/pmi.c
+++ b/src/api/pmi.c
@@ -112,8 +112,11 @@ struct kvs_rec {
 	char **		kvs_values;
 };
 
+#define _DEBUG 0
+
 static void _del_kvs_rec( struct kvs_rec *kvs_ptr );
 static void _init_kvs( char kvsname[] );
+static void inline _kvs_dump(void);
 static int  _kvs_put( const char kvsname[], const char key[], 
 		const char value[], int local);
 static void _kvs_swap(struct kvs_rec *kvs_ptr, int inx1, int inx2);
@@ -834,7 +837,10 @@ static void _init_kvs( char kvsname[] )
 	i = kvs_rec_cnt;
 	kvs_rec_cnt++;
 	kvs_recs = realloc(kvs_recs, (sizeof(struct kvs_rec) * kvs_rec_cnt));
-	kvs_recs[i].kvs_name = strndup(kvsname, PMI_MAX_KVSNAME_LEN);
+	/* DO NOT CHANGE TO STRNDUP(), NOT SUPPORTED ON AIX */
+	kvs_recs[i].kvs_name = malloc(PMI_MAX_KVSNAME_LEN);
+	if (kvs_recs[i].kvs_name)
+		strncpy(kvs_recs[i].kvs_name, kvsname, PMI_MAX_KVSNAME_LEN);
 	kvs_recs[i].kvs_state = KVS_STATE_LOCAL;
 	kvs_recs[i].kvs_cnt = 0;
 	kvs_recs[i].kvs_inx = 0;
@@ -1077,13 +1083,16 @@ static int _kvs_put( const char kvsname[], const char key[], const char value[],
 				kvs_recs[i].kvs_key_states[j] = KVS_KEY_STATE_LOCAL;
 			/* else leave unchanged */
 			/* replace the existing value */
-			free(kvs_recs[i].kvs_values[j]);
-			kvs_recs[i].kvs_values[j] = 
-					strndup(value, PMI_MAX_VAL_LEN);
+			/* DO NOT CHANGE TO STRNDUP(), NOT SUPPORTED ON AIX */
+			if (kvs_recs[i].kvs_values[j] == NULL)
+				kvs_recs[i].kvs_values[j] = malloc(PMI_MAX_VAL_LEN);
 			if (kvs_recs[i].kvs_values[j] == NULL)
 				rc = PMI_FAIL;	/* malloc error */
-			else
+			else {
 				rc = PMI_SUCCESS;
+				strncpy(kvs_recs[i].kvs_values[j], value, 
+					PMI_MAX_VAL_LEN);
+			}
 			goto fini;
 		}
 		/* create new key */
@@ -1104,18 +1113,23 @@ static int _kvs_put( const char kvsname[], const char key[], const char value[],
 			kvs_recs[i].kvs_key_states[j] = KVS_KEY_STATE_LOCAL;
 		else
 			kvs_recs[i].kvs_key_states[j] = KVS_KEY_STATE_GLOBAL;
-		kvs_recs[i].kvs_values[j] = strndup(value, PMI_MAX_VAL_LEN);
-		kvs_recs[i].kvs_keys[j]   = strndup(key, PMI_MAX_KEY_LEN);
+		/* DO NOT CHANGE TO STRNDUP(), NOT SUPPORTED ON AIX */
+		kvs_recs[i].kvs_values[j] = malloc(PMI_MAX_VAL_LEN);
+		kvs_recs[i].kvs_keys[j]   = malloc(PMI_MAX_KEY_LEN);
 		if ((kvs_recs[i].kvs_values[j] == NULL)
 		||  (kvs_recs[i].kvs_keys[j] == NULL))
 			rc = PMI_FAIL;	/* malloc error */
-		else
+		else {
 			rc = PMI_SUCCESS;
+			strncpy(kvs_recs[i].kvs_values[j], value, PMI_MAX_VAL_LEN);
+			strncpy(kvs_recs[i].kvs_keys[j],   key,   PMI_MAX_KEY_LEN);
+		}
 		goto fini;
 	}
 	rc = PMI_ERR_INVALID_KVS;
 
 fini:	pthread_mutex_unlock(&kvs_mutex);
+	_kvs_dump();
 	return rc;
 }
 
@@ -1811,3 +1825,22 @@ static int IsPmiKey(char * key) {
 	/* add code to test special key if needed */
 	return 0;
 }
+
+static void inline _kvs_dump(void)
+{
+#if _DEBUG
+	int i, j;
+
+	for (i=0; i<kvs_rec_cnt; i++) {
+		info("name=%s state=%u cnt=%u inx=%u",
+			kvs_recs[i].kvs_name, kvs_recs[i].kvs_state,
+			kvs_recs[i].kvs_cnt, kvs_recs[i].kvs_inx);
+		for (j=0; j<kvs_recs[i].kvs_cnt; j++) {
+			info("  state=%u key=%s value=%s",
+				kvs_recs[i].kvs_key_states[j],
+				kvs_recs[i].kvs_keys[j],
+				kvs_recs[i].kvs_values[j]);
+		}
+	}
+#endif
+}
diff --git a/src/api/pmi_server.c b/src/api/pmi_server.c
index 8898e0030..d03855089 100644
--- a/src/api/pmi_server.c
+++ b/src/api/pmi_server.c
@@ -1,6 +1,6 @@
 /*****************************************************************************\
  *  pmi.c - Global PMI data as maintained within srun
- *  $Id: pmi_server.c 12083 2007-08-22 16:19:14Z jette $
+ *  $Id: pmi_server.c 12315 2007-09-13 23:56:02Z jette $
  *****************************************************************************
  *  Copyright (C) 2005-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -30,6 +30,7 @@
 #endif
 
 #include <pthread.h>
+#include <stdlib.h>
 #include <slurm/slurm_errno.h>
 
 #include "src/api/slurm_pmi.h"
@@ -42,7 +43,7 @@
 #include "src/common/xmalloc.h"
 
 #define _DEBUG           0	/* non-zero for extra KVS logging */
-#define PMI_FANOUT      32	/* max fanout for PMI msg forwarding */
+#define _DEBUG_TIMING    0	/* non-zero for KVS timing details */
 
 static pthread_mutex_t kvs_mutex = PTHREAD_MUTEX_INITIALIZER;
 static int kvs_comm_cnt = 0;
@@ -179,12 +180,20 @@ static void *_agent(void *x)
 	struct kvs_comm_set *kvs_set;
 	struct msg_arg *msg_args;
 	struct kvs_hosts *kvs_host_list;
-	int i, j, kvs_set_cnt = 0, host_cnt;
+	int i, j, kvs_set_cnt = 0, host_cnt, pmi_fanout = 32;
 	int msg_sent = 0, max_forward = 0;
+	char *tmp;
 	pthread_t msg_id;
 	pthread_attr_t attr;
 	DEF_TIMERS;
 
+	tmp = getenv("PMI_FANOUT");
+	if (tmp) {
+		pmi_fanout = atoi(tmp);
+		if (pmi_fanout < 1)
+			pmi_fanout = 32;
+	}
+
 	/* only send one message to each host, 
 	 * build table of the ports on each host */
 	START_TIMER;
@@ -194,9 +203,9 @@ static void *_agent(void *x)
 	for (i=0; i<args->barrier_xmit_cnt; i++) {
 		if (args->barrier_xmit_ptr[i].port == 0)
 			continue;	/* already sent message to host */
-		kvs_host_list = xmalloc(sizeof(struct kvs_hosts) * PMI_FANOUT);
+		kvs_host_list = xmalloc(sizeof(struct kvs_hosts) * pmi_fanout);
 		host_cnt = 0;
-#if PMI_FANOUT
+
 		/* This code enables key-pair forwarding between 
 		 * tasks. First task on the node gets the key-pairs
 		 * with host/port information for all other tasks on
@@ -214,10 +223,10 @@ static void *_agent(void *x)
 					args->barrier_xmit_ptr[j].hostname;
 			args->barrier_xmit_ptr[j].port = 0;/* don't reissue */
 			host_cnt++;
-			if (host_cnt >= PMI_FANOUT)
+			if (host_cnt >= pmi_fanout)
 				break;
 		}
-#endif
+
 		msg_sent++;
 		max_forward = MAX(host_cnt, max_forward);
 
@@ -413,6 +422,11 @@ extern int pmi_kvs_put(struct kvs_comm_set *kvs_set_ptr)
 extern int pmi_kvs_get(kvs_get_msg_t *kvs_get_ptr)
 {
 	int rc = SLURM_SUCCESS;
+#if _DEBUG_TIMING
+	static uint32_t tm[10000];
+	int cur_time, i;
+	struct timeval tv;
+#endif
 
 #if _DEBUG
 	info("pmi_kvs_get: rank:%u size:%u port:%u, host:%s", 
@@ -423,7 +437,12 @@ extern int pmi_kvs_get(kvs_get_msg_t *kvs_get_ptr)
 		error("PMK_KVS_Barrier reached with size == 0");
 		return SLURM_ERROR;
 	}
-
+#if _DEBUG_TIMING
+	gettimeofday(&tv, NULL);
+	cur_time = (tv.tv_sec % 1000) + tv.tv_usec;
+	if (kvs_get_ptr->task_id < 10000)
+		tm[kvs_get_ptr->task_id] = cur_time;
+#endif
 	pthread_mutex_lock(&kvs_mutex);
 	if (barrier_cnt == 0) {
 		barrier_cnt = kvs_get_ptr->size;
@@ -448,9 +467,18 @@ extern int pmi_kvs_get(kvs_get_msg_t *kvs_get_ptr)
 	barrier_ptr[kvs_get_ptr->task_id].port = kvs_get_ptr->port;
 	barrier_ptr[kvs_get_ptr->task_id].hostname = kvs_get_ptr->hostname;
 	kvs_get_ptr->hostname = NULL; /* just moved the pointer */
-	if (barrier_resp_cnt == barrier_cnt)
+	if (barrier_resp_cnt == barrier_cnt) {
+#if _DEBUG_TIMING
+		info("task[%d] at %u", 0, tm[0]);
+		for (i=1; ((i<barrier_cnt) && (i<10000)); i++) {
+			cur_time = (int) tm[i] - (int) tm[i-1];
+			info("task[%d] at %u diff %d", i, tm[i], cur_time);
+		}
+#endif
 		_kvs_xmit_tasks();
+}
 fini:	pthread_mutex_unlock(&kvs_mutex); 
+
 	return rc;
 }
 
diff --git a/src/api/slurm_pmi.c b/src/api/slurm_pmi.c
index 8a5fb2b21..f1c2454e4 100644
--- a/src/api/slurm_pmi.c
+++ b/src/api/slurm_pmi.c
@@ -36,6 +36,7 @@
 \*****************************************************************************/
 
 #include <stdlib.h>
+#include <sys/time.h>
 #include <slurm/slurm.h>
 #include <slurm/slurm_errno.h>
 
@@ -48,17 +49,77 @@
 #include "src/common/fd.h"
 #include "src/common/slurm_auth.h"
 
-#define MAX_RETRIES 5
+#define DEFAULT_PMI_TIME 500
+#define MAX_RETRIES      5
 
 int pmi_fd = -1;
 int pmi_time = 0;
 uint16_t srun_port = 0;
 slurm_addr srun_addr;
 
-static int _forward_comm_set(struct kvs_comm_set *kvs_set_ptr);
-static int _get_addr(void);
+static void _delay_rpc(int pmi_rank, int pmi_size);
+static int  _forward_comm_set(struct kvs_comm_set *kvs_set_ptr);
+static int  _get_addr(void);
 static void _set_pmi_time(void);
 
+/* Delay an RPC to srun in order to avoid overwhelming the srun command.
+ * The delay is based upon the number of tasks, this task's rank, and PMI_TIME.
+ * This logic depends upon synchronized clocks across the cluster. */
+static void _delay_rpc(int pmi_rank, int pmi_size)
+{
+	struct timeval tv1, tv2;
+	uint32_t cur_time;	/* current time in usec (just 9 digits) */
+	uint32_t tot_time;	/* total time expected for all RPCs */
+	uint32_t offset_time;	/* relative time within tot_time */
+	uint32_t target_time;	/* desired time to issue the RPC */
+	uint32_t delta_time, error_time;
+	int retries = 0;
+
+	_set_pmi_time();
+
+again:	if (gettimeofday(&tv1, NULL)) {
+		usleep(pmi_rank * pmi_time);
+		return;
+	}
+
+	cur_time = (tv1.tv_sec % 1000) + tv1.tv_usec;
+	tot_time = pmi_size * pmi_time;
+	offset_time = cur_time % tot_time;
+	target_time = pmi_rank * pmi_time;
+	if (target_time < offset_time)
+		delta_time = target_time - offset_time + tot_time;
+	else
+		delta_time = target_time - offset_time;
+	if (usleep(delta_time)) {
+		if (errno == EINVAL)
+			usleep(900000);
+		/* errno == EINTR */
+		goto again;
+	}
+
+	/* Verify we are active at the right time. If current time is different
+	 * from target by more than 15*pmi_time, then start over. If PMI_TIME 
+	 * is set appropriately, then srun should have no more than 30 RPCs
+	 * in the queue at one time in the worst case. */
+	if (gettimeofday(&tv2, NULL))
+		return;
+	tot_time = (tv2.tv_sec - tv1.tv_sec) * 1000000;
+	tot_time += tv2.tv_usec;
+	tot_time -= tv1.tv_usec;
+	if (tot_time >= delta_time)
+		error_time = tot_time - delta_time;
+	else
+		error_time = delta_time - tot_time;
+	if (error_time > (15*pmi_time)) {	/* too far off */
+#if 0
+		info("delta=%u tot=%u err=%u", 
+			delta_time, tot_time, error_time);
+#endif
+		if ((++retries) <= 2)
+			goto again;
+	}
+}
+
 static int _get_addr(void)
 {
 	char *env_host, *env_port;
@@ -85,14 +146,14 @@ static void _set_pmi_time(void)
 
 	tmp = getenv("PMI_TIME");
 	if (tmp == NULL) {
-		pmi_time = 500;
+		pmi_time = DEFAULT_PMI_TIME;
 		return;
 	}
 
 	pmi_time = strtol(tmp, &endptr, 10);
 	if ((pmi_time < 0) || (endptr[0] != '\0')) {
 		error("Invalid PMI_TIME: %s", tmp);
-		pmi_time = 500;
+		pmi_time = DEFAULT_PMI_TIME;
 	}
 }
 
@@ -123,9 +184,11 @@ int slurm_send_kvs_comm_set(struct kvs_comm_set *kvs_set_ptr,
 	 * command is very overloaded.
 	 * We also increase the timeout (default timeout is
 	 * 10 secs). */
-	usleep(pmi_rank * pmi_time);
-	if      (pmi_size > 1000)	/* 100 secs */
-		timeout = slurm_get_msg_timeout() * 10000;
+	_delay_rpc(pmi_rank, pmi_size);
+	if      (pmi_size > 4000)	/* 240 secs */
+		timeout = slurm_get_msg_timeout() * 24000;
+	else if (pmi_size > 1000)	/* 120 secs */
+		timeout = slurm_get_msg_timeout() * 12000;
 	else if (pmi_size > 100)	/* 50 secs */
 		timeout = slurm_get_msg_timeout() * 5000;
 	else if (pmi_size > 10)		/* 20 secs */
@@ -135,8 +198,9 @@ int slurm_send_kvs_comm_set(struct kvs_comm_set *kvs_set_ptr,
 		if (retries++ > MAX_RETRIES) {
 			error("slurm_send_kvs_comm_set: %m");
 			return SLURM_ERROR;
-		}
-		usleep(pmi_rank * pmi_time);
+		} else
+			debug("send_kvs retry %d", retries);
+		_delay_rpc(pmi_rank, pmi_size);
 	}
 
 	return rc;
@@ -153,9 +217,10 @@ int  slurm_get_kvs_comm_set(struct kvs_comm_set **kvs_set_ptr,
 	uint16_t port;
 	kvs_get_msg_t data;
 	char *env_pmi_ifhn;
-	
+
 	if (kvs_set_ptr == NULL)
 		return EINVAL;
+	*kvs_set_ptr = NULL;	/* initialization */
 
 	if ((rc = _get_addr()) != SLURM_SUCCESS) {
 		error("_get_addr: %m");
@@ -202,18 +267,14 @@ int  slurm_get_kvs_comm_set(struct kvs_comm_set **kvs_set_ptr,
 	 * Also increase the message timeout if many tasks 
 	 * since the srun command can get very overloaded (the
 	 * default timeout is 10 secs).
-	 *
-	 * TaskID  SendTime  GetTime  (Units are PMI_TIME, default=500 usec)
-	 *      0         0      N+0
-	 *      1         1      N+1
-	 *      2         2      N+2
-	 *    N-1       N-1      N+N-1
 	 */
-	usleep(pmi_size * pmi_time);
-	if      (pmi_size > 1000)	/* 100 secs */
-		timeout = slurm_get_msg_timeout() * 10000;
-	else if (pmi_size > 100)	/* 50 secs */
-		timeout = slurm_get_msg_timeout() * 5000;
+	_delay_rpc(pmi_rank, pmi_size);
+	if      (pmi_size > 4000)	/* 240 secs */
+		timeout = slurm_get_msg_timeout() * 24000;
+	else if (pmi_size > 1000)	/* 120 secs */
+		timeout = slurm_get_msg_timeout() * 12000;
+	else if (pmi_size > 100)	/* 60 secs */
+		timeout = slurm_get_msg_timeout() * 6000;
 	else if (pmi_size > 10)		/* 20 secs */
 		timeout = slurm_get_msg_timeout() * 2000;
 
@@ -221,8 +282,9 @@ int  slurm_get_kvs_comm_set(struct kvs_comm_set **kvs_set_ptr,
 		if (retries++ > MAX_RETRIES) {
 			error("slurm_get_kvs_comm_set: %m");
 			return SLURM_ERROR;
-		}
-		usleep(pmi_rank * pmi_time);
+		} else
+			debug("get kvs retry %d", retries);
+		_delay_rpc(pmi_rank, pmi_size);
 	}
 	if (rc != SLURM_SUCCESS) {
 		error("slurm_get_kvs_comm_set error_code=%d", rc);
@@ -262,7 +324,8 @@ int  slurm_get_kvs_comm_set(struct kvs_comm_set **kvs_set_ptr,
 }
 
 /* Forward keypair info to other tasks as required.
-* Clear message forward structure upon completion. */
+ * Clear message forward structure upon completion. 
+ * The messages are forwarded sequentially. */
 static int _forward_comm_set(struct kvs_comm_set *kvs_set_ptr)
 {
 	int i, rc = SLURM_SUCCESS;
diff --git a/src/common/env.c b/src/common/env.c
index 36ce6d9ba..aeecc36a6 100644
--- a/src/common/env.c
+++ b/src/common/env.c
@@ -1,6 +1,6 @@
 /*****************************************************************************\
  *  src/common/env.c - add an environment variable to environment vector
- *  $Id: env.c 11774 2007-07-02 20:13:14Z jette $
+ *  $Id: env.c 12233 2007-09-07 22:03:42Z jette $
  *****************************************************************************
  *  Copyright (C) 2002-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -294,7 +294,7 @@ int setup_env(env_t *env)
 	if (env->cpus_on_node 
 	   && setenvf(&env->env, "SLURM_CPUS_ON_NODE", "%d", 
 		      env->cpus_on_node) ) {
-		error("Unable to set SLURM_CPUS_PER_TASK");
+		error("Unable to set SLURM_CPUS_ON_NODE");
 		rc = SLURM_FAILURE;
 	} 
 
diff --git a/src/common/hostlist.c b/src/common/hostlist.c
index eb0c3818d..82454eb34 100644
--- a/src/common/hostlist.c
+++ b/src/common/hostlist.c
@@ -1,5 +1,5 @@
 /*****************************************************************************\
- *  $Id: hostlist.c 12180 2007-08-30 18:06:57Z jette $
+ *  $Id: hostlist.c 12391 2007-09-22 00:23:04Z da $
  *****************************************************************************
  *  $LSDId: hostlist.c,v 1.14 2003/10/14 20:11:54 grondo Exp $
  *****************************************************************************
@@ -277,7 +277,7 @@ static char * _next_tok(char *, char **);
 static int    _zero_padded(unsigned long, int);
 static int    _width_equiv(unsigned long, int *, unsigned long, int *);
 
-static size_t        host_prefix_end(const char *);
+static int           host_prefix_end(const char *);
 static hostname_t    hostname_create(const char *);
 static void          hostname_destroy(hostname_t);
 static int           hostname_suffix_is_valid(hostname_t);
@@ -528,15 +528,21 @@ static int _width_equiv(unsigned long n, int *wn, unsigned long m, int *wm)
 /* 
  * return the location of the last char in the hostname prefix
  */
-static size_t host_prefix_end(const char *hostname)
+static int host_prefix_end(const char *hostname)
 {
-	size_t idx; 
-	if (!hostname)
-		return -1;
+	int idx, len;
+
+	assert(hostname != NULL);
+
+	len = strlen(hostname);
 #ifdef HAVE_BG
-	idx = strlen(hostname) - 4;
+	if (len < 4)
+		return -1;
+	idx = len - 4;
 #else
-	idx = strlen(hostname) - 1;
+	if (len < 1)
+		return -1;
+	idx = len - 1;
 
 	while (idx >= 0 && isdigit((char) hostname[idx])) 
 		idx--;
@@ -551,7 +557,7 @@ static hostname_t hostname_create(const char *hostname)
 {
 	hostname_t hn = NULL;
 	char *p = '\0';
-	size_t idx = 0;
+	int idx = 0;
 
 	assert(hostname != NULL);
 
@@ -569,7 +575,7 @@ static hostname_t hostname_create(const char *hostname)
 	hn->prefix = NULL;
 	hn->suffix = NULL;
 
-	if (idx == strlen(hostname) - 1) {
+	if (idx == (strlen(hostname) - 1)) {
 		if ((hn->prefix = strdup(hostname)) == NULL) {
 			hostname_destroy(hn);
 			out_of_memory("hostname prefix create");
diff --git a/src/common/read_config.c b/src/common/read_config.c
index 6ef5baeed..b1dd0d7da 100644
--- a/src/common/read_config.c
+++ b/src/common/read_config.c
@@ -238,7 +238,7 @@ static int parse_slurmd_port(void **dest, slurm_parser_enum_t type,
 		return -1;
 	}
 
-	default_slurmd_port = (uint32_t)num;
+	default_slurmd_port = (uint16_t) num;
 
 	ptr = (uint32_t *)xmalloc(sizeof(uint32_t));
 	*ptr = (uint32_t)num;
@@ -917,6 +917,8 @@ extern uint16_t slurm_conf_get_port(const char *node_name)
 	while (p) {
 		if (strcmp(p->alias, node_name) == 0) {
 			uint16_t port = p->port;
+			if ((!port || port == SLURMD_PORT))
+				port = default_slurmd_port;
 			slurm_conf_unlock();
 			return port;
 		}
diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c
index 2c697347d..bb0eac94b 100644
--- a/src/common/slurm_protocol_api.c
+++ b/src/common/slurm_protocol_api.c
@@ -804,10 +804,10 @@ int slurm_receive_msg(slurm_fd fd, slurm_msg_t *msg, int timeout)
                 timeout  = slurm_get_msg_timeout() * 1000; 
 
 	else if(timeout > (slurm_get_msg_timeout() * 10000)) {
-		error("You are sending a message with very long "
+		debug("You are receiving a message with very long "
 		      "timeout of %d seconds", (timeout/1000));
 	} else if(timeout < 1000) {
-		error("You are sending a message with a very short "
+		error("You are receiving a message with a very short "
 		      "timeout of %d msecs", timeout);
 	} 
 	
@@ -950,7 +950,7 @@ List slurm_receive_msgs(slurm_fd fd, int steps, int timeout)
 	debug4("orig_timeout was %d we have %d steps and a timeout of %d",
 	       orig_timeout, steps, timeout);
 	if(orig_timeout >= (slurm_get_msg_timeout() * 10000)) {
-		error("slurm_receive_msgs: "
+		debug("slurm_receive_msgs: "
 		      "You are sending a message with timeout's greater "
 		      "than %d seconds, your's is %d seconds", 
 		      (slurm_get_msg_timeout() * 10), 
@@ -1111,7 +1111,7 @@ int slurm_receive_msg_and_forward(slurm_fd fd, slurm_addr *orig_addr,
                 timeout  = slurm_get_msg_timeout() * 1000; 
 		
 	if(timeout >= (slurm_get_msg_timeout() * 10000)) {
-		error("slurm_receive_msg_and_forward: "
+		debug("slurm_receive_msg_and_forward: "
 		      "You are sending a message with timeout's greater "
 		      "than %d seconds, your's is %d seconds", 
 		      (slurm_get_msg_timeout() * 10), 
diff --git a/src/common/timers.c b/src/common/timers.c
index 949382e7e..421cdae91 100644
--- a/src/common/timers.c
+++ b/src/common/timers.c
@@ -54,7 +54,7 @@ inline void diff_tv_str(struct timeval *tv1,struct timeval *tv2,
 	delta_t +=  tv2->tv_usec - tv1->tv_usec;
 	snprintf(tv_str, len_tv_str, "usec=%ld", delta_t);
 	if (delta_t > 1000000)
-		info("Warning: Note very large processing time: %s",tv_str); 
+		verbose("Warning: Note very large processing time: %s",tv_str); 
 }
 
 /*
diff --git a/src/plugins/jobacct/common/common_slurmstepd.c b/src/plugins/jobacct/common/common_slurmstepd.c
index 2426dcbd3..22b1222cf 100644
--- a/src/plugins/jobacct/common/common_slurmstepd.c
+++ b/src/plugins/jobacct/common/common_slurmstepd.c
@@ -62,7 +62,7 @@ extern int common_set_proctrack_container_id(uint32_t id)
 		info("Warning: jobacct: set_proctrack_container_id: "
 		     "cont_id is already set to %d you are setting it to %d",
 		     cont_id, id);
-	if(id <= 0) {
+	if((int)id <= 0) {
 		error("jobacct: set_proctrack_container_id: "
 		      "I was given most likely an unset cont_id %d",
 		      id);
diff --git a/src/plugins/mpi/mvapich/mvapich.c b/src/plugins/mpi/mvapich/mvapich.c
index 1be8fef2a..96784b0b7 100644
--- a/src/plugins/mpi/mvapich/mvapich.c
+++ b/src/plugins/mpi/mvapich/mvapich.c
@@ -230,7 +230,7 @@ static int startup_timeout (mvapich_state_t *st)
 	now = time (NULL);
 
 	if (!st->start_time)
-		st->start_time = now;
+		return (-1);
 
 	remaining = st->timeout - (now - st->start_time);
 
@@ -282,19 +282,24 @@ static int mvapich_write_n (mvapich_state_t *st, struct mvapich_info *mvi,
 {
 	int nleft = len;
 	int n = 0;
+	unsigned char * p = buf;
 
-	while (nleft) {
+	while (nleft > 0) {
 		/* Poll for write-activity */
 		if (mvapich_poll (st, mvi, 1) < 0)
 			return (-1);
 
-		 if ((n = fd_write_n (mvi->fd, buf, len)) < 0 && (errno != EINTR))
+		if ((n = write (mvi->fd, p, nleft)) < 0) {
+			if (errno == EAGAIN || errno == EINTR)
+				continue;
 			return (-1);
+		}
 
 		nleft -= n;
+		p += n;
 	}
 
-	 return (n);
+	return (len - nleft);
 }
 
 static int mvapich_read_n (mvapich_state_t *st,  struct mvapich_info *mvi,
@@ -302,19 +307,27 @@ static int mvapich_read_n (mvapich_state_t *st,  struct mvapich_info *mvi,
 {
 	int nleft = len;
 	int n = 0;
+	unsigned char * p = buf;
 
-	while (nleft) {
+	while (nleft > 0) {
 		/* Poll for write-activity */
 		if (mvapich_poll (st, mvi, 0) < 0)
 			return (-1);
 
-		 if ((n = fd_read_n (mvi->fd, buf, len)) < 0 && (errno != EINTR))
+		if ((n = read (mvi->fd, p, nleft)) < 0) { 
+			if (errno == EAGAIN || errno == EINTR)
+				continue;
+			return (-1);
+		}
+
+		if (n == 0) /* unexpected EOF */
 			return (-1);
 
 		nleft -= n;
+		p += n;
 	}
 
-	 return (n);
+	return (len - nleft);
 }
 
 
@@ -340,6 +353,8 @@ static int mvapich_abort_sends_rank (mvapich_state_t *st)
 static int mvapich_get_task_info (mvapich_state_t *st,
 				  struct mvapich_info *mvi)
 {
+	mvi->do_poll = 0;
+
 	if (mvapich_read_n (st, mvi, &mvi->addrlen, sizeof (int)) <= 0)
 		return error ("mvapich: Unable to read addrlen for rank %d: %m", 
 				mvi->rank);
@@ -365,8 +380,6 @@ static int mvapich_get_task_info (mvapich_state_t *st,
 				mvi->rank);
 	}
 
-	mvi->do_poll = 0;
-
 	return (0);
 }
 
@@ -1133,6 +1146,10 @@ again:
 		if (first) {
 			mvapich_debug ("first task checked in");
 			do_timings (st);
+			/*
+			 *  Officially start timeout timer now.
+			 */
+			st->start_time = time(NULL);
 			first = 0;
 		}
 
diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c
index c8b06a05f..804d289cd 100644
--- a/src/plugins/sched/backfill/backfill.c
+++ b/src/plugins/sched/backfill/backfill.c
@@ -606,15 +606,18 @@ _backfill_part(part_specs_t *part_specs)
 	/* find job to possibly backfill */
 	pend_job_iterate = list_iterator_create(pend_job_list);
 	while ( (pend_job_ptr = list_next(pend_job_iterate)) ) {
+		if (first_job == NULL)
+			first_job = pend_job_ptr;
+
 		if (_loc_restrict(pend_job_ptr, part_specs)) {
 #if __DEBUG
 			info("Job %u has locality restrictions",
 				pend_job_ptr->job_id);
 #endif
-			break;
+			continue;
 		}
 
-		if (first_job == NULL) {
+		if (first_job == pend_job_ptr) {
 			if (pend_job_ptr->details == NULL)
 				break;
 			if (pend_job_ptr->details->min_nodes <= 
@@ -625,7 +628,6 @@ _backfill_part(part_specs_t *part_specs)
 #endif
 				break;
 			}
-			first_job = pend_job_ptr;
 		}
 
 		if (_update_node_space_map(pend_job_ptr)) {
diff --git a/src/plugins/sched/wiki/msg.c b/src/plugins/sched/wiki/msg.c
index 01e0c5fb4..0f9b9e9b3 100644
--- a/src/plugins/sched/wiki/msg.c
+++ b/src/plugins/sched/wiki/msg.c
@@ -54,6 +54,7 @@ char     auth_key[KEY_SIZE] = "";
 char     e_host[E_HOST_SIZE] = "";
 char     e_host_bu[E_HOST_SIZE] = "";
 uint16_t e_port = 0;
+struct   part_record *exclude_part_ptr[EXC_PART_CNT];
 uint16_t job_aggregation_time = 10;	/* Default value is 10 seconds */
 int      init_prio_mode = PRIO_HOLD;
 uint16_t kill_wait;
@@ -228,15 +229,20 @@ extern int parse_wiki_config(void)
 		{"EHost", S_P_STRING},
 		{"EHostBackup", S_P_STRING},
 		{"EPort", S_P_UINT16},
+		{"ExcludePartitions", S_P_STRING},
 		{"JobAggregationTime", S_P_UINT16},
 		{"JobPriority", S_P_STRING}, 
 		{NULL} };
 	s_p_hashtbl_t *tbl;
+	char *exclude_partitions;
 	char *key = NULL, *priority_mode = NULL, *wiki_conf;
 	struct stat buf;
 	slurm_ctl_conf_t *conf;
+	int i;
 
 	/* Set default values */
+	for (i=0; i<EXC_PART_CNT; i++)
+		exclude_part_ptr[i] = NULL;
 	conf = slurm_conf_lock();
 	strncpy(e_host, conf->control_addr, sizeof(e_host));
 	if (conf->backup_addr) {
@@ -276,6 +282,25 @@ extern int parse_wiki_config(void)
 	s_p_get_uint16(&e_port, "EPort", tbl);
 	s_p_get_uint16(&job_aggregation_time, "JobAggregationTime", tbl); 
 
+	if (s_p_get_string(&exclude_partitions, "ExcludePartitions", tbl)) {
+		char *tok, *tok_p;
+		tok = strtok_r(exclude_partitions, ",", &tok_p);
+		i = 0;
+		while (tok) {
+			if (i >= EXC_PART_CNT) {
+				error("ExcludePartitions has too many entries "
+				      "skipping %s and later entries");
+				break;
+			}	
+			exclude_part_ptr[i] = find_part_record(tok);
+			if (exclude_part_ptr[i])
+				i++;
+			else
+				error("ExcludePartitions %s not found", tok);
+			tok = strtok_r(NULL, ",", &tok_p);
+		}
+	}
+
 	if (s_p_get_string(&priority_mode, "JobPriority", tbl)) {
 		if (strcasecmp(priority_mode, "hold") == 0)
 			init_prio_mode = PRIO_HOLD;
diff --git a/src/plugins/sched/wiki/msg.h b/src/plugins/sched/wiki/msg.h
index 12fdec816..76458521a 100644
--- a/src/plugins/sched/wiki/msg.h
+++ b/src/plugins/sched/wiki/msg.h
@@ -79,9 +79,11 @@
 #include "src/common/xmalloc.h"
 #include "src/common/xsignal.h"
 #include "src/common/xstring.h"
+#include "src/slurmctld/slurmctld.h"
 
 /* Global configuration parameters */
 #define E_HOST_SIZE  256
+#define EXC_PART_CNT  10
 #define KEY_SIZE      32
 #define PRIO_HOLD      0
 #define PRIO_DECREMENT 1
@@ -90,6 +92,7 @@ extern char 	auth_key[KEY_SIZE];
 extern char	e_host[E_HOST_SIZE];
 extern char	e_host_bu[E_HOST_SIZE];
 extern uint16_t	e_port;
+extern struct	part_record *exclude_part_ptr[EXC_PART_CNT];
 extern uint16_t	job_aggregation_time;
 extern uint16_t kill_wait;
 extern uint16_t use_host_exp;
diff --git a/src/plugins/sched/wiki/sched_wiki.c b/src/plugins/sched/wiki/sched_wiki.c
index f01f1badf..64606a366 100644
--- a/src/plugins/sched/wiki/sched_wiki.c
+++ b/src/plugins/sched/wiki/sched_wiki.c
@@ -84,13 +84,30 @@ extern uint32_t
 slurm_sched_plugin_initial_priority( uint32_t last_prio,
 				     struct job_record *job_ptr )
 {
+	if (exclude_part_ptr[0]) {
+		/* Interactive job (initiated by srun) in partition
+		 * excluded from Moab scheduling */
+		int i;
+		static int exclude_prio = 100000000;
+		for (i=0; i<EXC_PART_CNT; i++) {
+			if (exclude_part_ptr[i] == NULL)
+				break;
+			if (exclude_part_ptr[i] == job_ptr->part_ptr) {
+				debug("Scheduiling job %u directly (no Maui)", 
+					job_ptr->job_id);
+				return (exclude_prio--);
+			}
+		}
+		return 0;
+	}
+
 	if (init_prio_mode == PRIO_DECREMENT) {
 		if (last_prio >= 2)
 			return (last_prio - 1);
 		else
 			return 1;
-	} else 
-		return 0;
+	}
+	return 0;
 }
 
 /**************************************************************************/
diff --git a/src/plugins/sched/wiki2/hostlist.c b/src/plugins/sched/wiki2/hostlist.c
index 3818d023d..c31dc6bf8 100644
--- a/src/plugins/sched/wiki2/hostlist.c
+++ b/src/plugins/sched/wiki2/hostlist.c
@@ -86,7 +86,11 @@ extern char * moab2slurm_task_list(char *moab_tasklist, int *task_cnt)
 	}
 
 	*task_cnt = 0;
+
+	/* Moab format 2 if string contains '*' or '[' */ 
 	tmp1 = strchr(moab_tasklist, (int) '*');
+	if (tmp1 == NULL)
+		tmp1 = strchr(moab_tasklist, (int) '[');
 
 	if (tmp1 == NULL) {	/* Moab format 1 */
 		slurm_tasklist = xstrdup(moab_tasklist);
@@ -191,7 +195,8 @@ static void _append_hl_buf(char **buf, hostlist_t *hl_tmp, int *reps)
 {
 	int host_str_len = 4096;
 	char *host_str, tmp_str[64];
-	char *sep, *tok, *tok_p;
+	char *tok, *sep;
+	int i, in_bracket = 0, fini = 0;
 
 	host_str = xmalloc(host_str_len);
 	hostlist_uniq(*hl_tmp);
@@ -199,19 +204,39 @@ static void _append_hl_buf(char **buf, hostlist_t *hl_tmp, int *reps)
 		host_str_len *= 2;
 		xrealloc(*host_str, host_str_len);
 	}
+
 	/* Note that host_str may be of this form "alpha,beta". We want
-	 * to record this as "alpha*#:beta*#" and NOT "alpha,beta*#" */
+	 * to record this as "alpha*#:beta*#" and NOT "alpha,beta*#". 
+	 * NOTE: Do not break up command within brackets (e.g. "tux[1,2-4]") */
 	if (*buf)
 		sep = ":";
 	else
 		sep = "";
-	tok = strtok_r(host_str, ",", &tok_p);
-	while (tok) {
-		snprintf(tmp_str, sizeof(tmp_str), "%s%s*%d",
-			sep, tok, *reps);
-		xstrcat(*buf, tmp_str);
-		sep = ":";
-		tok = strtok_r(NULL, ",", &tok_p);
+	tok = host_str;
+	for (i=0; (fini == 0) ; i++) {
+		switch (tok[i]) {
+			case '[':
+				in_bracket = 1;
+				break;
+			case ']':
+				in_bracket = 0;
+				break;
+			case '\0':
+				fini = 1;
+				if (in_bracket)
+					error("badly formed hostlist %s", tok);
+			case ',':
+				if (in_bracket)
+					break;
+				tok[i] = '\0';
+				snprintf(tmp_str, sizeof(tmp_str), "%s%s*%d",
+					sep, tok, *reps);
+				xstrcat(*buf, tmp_str);
+				sep = ":";
+				tok += (i + 1);
+				i = -1;
+				break;
+		}
 	}
 	xfree(host_str);
 	hostlist_destroy(*hl_tmp);
diff --git a/src/plugins/sched/wiki2/start_job.c b/src/plugins/sched/wiki2/start_job.c
index 6c578bdf1..224f05da8 100644
--- a/src/plugins/sched/wiki2/start_job.c
+++ b/src/plugins/sched/wiki2/start_job.c
@@ -141,7 +141,6 @@ static int	_start_job(uint32_t jobid, int task_cnt, char *hostlist,
 	size_t node_name_len;
 	static uint32_t cr_test = 0, cr_enabled = 0;
 
-info("task_cnt=%d, hostlist=%s, tasklist=%s", task_cnt, hostlist, tasklist);
 	if (cr_test == 0) {
 		select_g_get_info_from_plugin(SELECT_CR_PLUGIN,
 						&cr_enabled);
diff --git a/src/plugins/select/bluegene/block_allocator/Makefile.am b/src/plugins/select/bluegene/block_allocator/Makefile.am
index a6d5d6151..1d003724a 100644
--- a/src/plugins/select/bluegene/block_allocator/Makefile.am
+++ b/src/plugins/select/bluegene/block_allocator/Makefile.am
@@ -6,16 +6,17 @@ CLEANFILES = core.*
 INCLUDES = -I$(top_srcdir) $(BG_INCLUDES)
 
 #to build the executable
-# noinst_PROGRAMS = partition_allocator
+# noinst_PROGRAMS = block_allocator
 
-# partition_allocator_SOURCES = block_allocator.c api_linker.c \
-#	block_allocator.h
+# block_allocator_SOURCES = block_allocator.c bridge_linker.c \
+# 	block_allocator.h bridge_linker.h
 
-# partition_allocator_LDADD = \
-# 	$(top_builddir)/src/common/libcommon.la -lpthread \
+# block_allocator_LDADD = \
+# 	$(top_builddir)/src/common/libcommon.la \
 # 	$(top_builddir)/src/api/libslurm.la
 
-# partition_allocator_LDFLAGS = -export-dynamic -lm $(CMD_LDFLAGS)
+
+# block_allocator_LDFLAGS = -export-dynamic -lm $(CMD_LDFLAGS)
 
 # CPPFLAGS = -DBUILD_EXE
 
diff --git a/src/plugins/select/bluegene/block_allocator/Makefile.in b/src/plugins/select/bluegene/block_allocator/Makefile.in
index 97637d185..d08331741 100644
--- a/src/plugins/select/bluegene/block_allocator/Makefile.in
+++ b/src/plugins/select/bluegene/block_allocator/Makefile.in
@@ -250,16 +250,16 @@ CLEANFILES = core.*
 INCLUDES = -I$(top_srcdir) $(BG_INCLUDES)
 
 #to build the executable
-# noinst_PROGRAMS = partition_allocator
+# noinst_PROGRAMS = block_allocator
 
-# partition_allocator_SOURCES = block_allocator.c api_linker.c \
-#	block_allocator.h
+# block_allocator_SOURCES = block_allocator.c bridge_linker.c \
+# 	block_allocator.h bridge_linker.h
 
-# partition_allocator_LDADD = \
-# 	$(top_builddir)/src/common/libcommon.la -lpthread \
+# block_allocator_LDADD = \
+# 	$(top_builddir)/src/common/libcommon.la \
 # 	$(top_builddir)/src/api/libslurm.la
 
-# partition_allocator_LDFLAGS = -export-dynamic -lm $(CMD_LDFLAGS)
+# block_allocator_LDFLAGS = -export-dynamic -lm $(CMD_LDFLAGS)
 
 # CPPFLAGS = -DBUILD_EXE
 
diff --git a/src/plugins/select/bluegene/block_allocator/block_allocator.c b/src/plugins/select/bluegene/block_allocator/block_allocator.c
index 4a58f9a8e..d9c6f3019 100644
--- a/src/plugins/select/bluegene/block_allocator/block_allocator.c
+++ b/src/plugins/select/bluegene/block_allocator/block_allocator.c
@@ -1,7 +1,7 @@
 /*****************************************************************************\
  *  block_allocator.c - Assorted functions for layout of bglblocks, 
  *	 wiring, mapping for smap, etc.
- *  $Id: block_allocator.c 11985 2007-08-09 23:07:08Z da $
+ *  $Id: block_allocator.c 12382 2007-09-21 21:13:23Z da $
  *****************************************************************************
  *  Copyright (C) 2004 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -170,9 +170,9 @@ static int _find_next_free_using_port_2(ba_switch_t *curr_switch,
 					List nodes, int dim, 
 					int count);
 /* */
-static int _find_passthrough(ba_switch_t *curr_switch, int source_port, 
-			     List nodes, int dim, 
-			     int count, int highest_phys_x); 
+/* static int _find_passthrough(ba_switch_t *curr_switch, int source_port,  */
+/* 			     List nodes, int dim,  */
+/* 			     int count, int highest_phys_x);  */
 /* */
 static int _finish_torus(ba_switch_t *curr_switch, int source_port, 
 			   List nodes, int dim, int count, int *start);
@@ -2895,14 +2895,15 @@ extern int set_bp_map(void)
 		
 		number = xstrntol(bp_id+1, NULL,
 				  BA_SYSTEM_DIMENSIONS, HOSTLIST_BASE);
-		if(DIM_SIZE[X] > bp_loc.X
-		   && DIM_SIZE[Y] > bp_loc.Y
-		   && DIM_SIZE[Z] > bp_loc.Z)
-			ba_system_ptr->grid
-				[bp_loc.X]
-				[bp_loc.Y]
-				[bp_loc.Z].phys_x =
-				number / (HOSTLIST_BASE * HOSTLIST_BASE);
+/* no longer needed for calculation */
+/* 		if(DIM_SIZE[X] > bp_loc.X */
+/* 		   && DIM_SIZE[Y] > bp_loc.Y */
+/* 		   && DIM_SIZE[Z] > bp_loc.Z) */
+/* 			ba_system_ptr->grid */
+/* 				[bp_loc.X] */
+/* 				[bp_loc.Y] */
+/* 				[bp_loc.Z].phys_x = */
+/* 				number / (HOSTLIST_BASE * HOSTLIST_BASE); */
 		
 		list_push(bp_map_list, bp_map);
 		
@@ -3501,7 +3502,7 @@ static int _set_external_wires(int dim, int count, ba_node_t* source,
 		switch(count) {
 		case 0:
 		case 6:
-			/* 0 and 4th Node no split */
+			/* 0 and 6th Node no split */
 		case 8:
 		case 9:
 		case 10:
@@ -3647,8 +3648,9 @@ static int _find_x_path(List results, ba_node_t *ba_node,
 	int i = 0;
 	ba_node_t *next_node = NULL;
 	ba_node_t *check_node = NULL;
-	int highest_phys_x = geometry[X] - start[X];
-	
+/* 	int highest_phys_x = geometry[X] - start[X]; */
+/* 	info("highest_phys_x is %d", highest_phys_x); */
+
 	ListIterator itr;
 
 	if(!ba_node)
@@ -3666,6 +3668,7 @@ static int _find_x_path(List results, ba_node_t *ba_node,
 	}
 	debug3("found - %d",found);
 	for(i=0;i<2;i++) {
+		/* info("trying port %d", ports_to_try[i]); */
 		/* check to make sure it isn't used */
 		if(!curr_switch->int_wire[ports_to_try[i]].used) {
 			/* looking at the next node on the switch 
@@ -3674,7 +3677,11 @@ static int _find_x_path(List results, ba_node_t *ba_node,
 				ext_wire[ports_to_try[i]].node_tar;
 			port_tar = curr_switch->
 				ext_wire[ports_to_try[i]].port_tar;
-
+			/* info("goes to %c%c%c port %d", */
+/* 			       alpha_num[node_tar[X]], */
+/* 			       alpha_num[node_tar[Y]], */
+/* 			       alpha_num[node_tar[Z]], */
+/* 			       port_tar); */
 			/* check to see if we are back at the start of the
 			   block */
 			if((node_tar[X] == 
@@ -3738,65 +3745,70 @@ static int _find_x_path(List results, ba_node_t *ba_node,
 			}
 			debug3("Broke = %d Found = %d geometry[X] = %d",
 			       broke, found, geometry[X]);
-			debug3("Next Phys X %d Highest X %d",
-			       next_node->phys_x, highest_phys_x);
-			if(next_node->phys_x >= highest_phys_x) {
-				debug3("looking for a passthrough");
-				if(best_path)
-					list_destroy(best_path);
-				best_path = list_create(_delete_path_list);
-				if(path)
-					list_destroy(path);
-				path = list_create(_delete_path_list);
+
+/* This doesnt' appear to be of any use since we are doing a circular
+ * system not a linear one.  Kept just to make sure.
+ */
+
+/* 			debug3("Next Phys X %d Highest X %d", */
+/* 			       next_node->phys_x, highest_phys_x); */
+/* 			if(next_node->phys_x >= highest_phys_x) { */
+/* 				debug3("looking for a passthrough"); */
+/* 				if(best_path) */
+/* 					list_destroy(best_path); */
+/* 				best_path = list_create(_delete_path_list); */
+/* 				if(path) */
+/* 					list_destroy(path); */
+/* 				path = list_create(_delete_path_list); */
 	
-				_find_passthrough(curr_switch,
-						  0,
-						  results,
-						  X,
-						  0,
-						  highest_phys_x);
-				if(best_count < BEST_COUNT_INIT) {
-					debug2("yes found next free %d", 
-					       best_count);
-					node_tar = _set_best_path();
-					next_node = &ba_system_ptr->
-						grid[node_tar[X]]
-#ifdef HAVE_BG
-						[node_tar[Y]]
-						[node_tar[Z]]
-#endif
-						;
-					next_switch = 
-						&next_node->axis_switch[X];
+/* 				_find_passthrough(curr_switch, */
+/* 						  0, */
+/* 						  results, */
+/* 						  X, */
+/* 						  0, */
+/* 						  highest_phys_x); */
+/* 				if(best_count < BEST_COUNT_INIT) { */
+/* 					debug2("yes found next free %d",  */
+/* 					       best_count); */
+/* 					node_tar = _set_best_path(); */
+/* 					next_node = &ba_system_ptr-> */
+/* 						grid[node_tar[X]] */
+/* #ifdef HAVE_BG */
+/* 						[node_tar[Y]] */
+/* 						[node_tar[Z]] */
+/* #endif */
+/* 						; */
+/* 					next_switch =  */
+/* 						&next_node->axis_switch[X]; */
 					
-#ifdef HAVE_BG
-					debug2("found %d looking at "
-					       "%c%c%c going to %c%c%c %d",
-					       found,
-					       alpha_num[ba_node->coord[X]],
-					       alpha_num[ba_node->coord[Y]],
-					       alpha_num[ba_node->coord[Z]],
-					       alpha_num[node_tar[X]],
-					       alpha_num[node_tar[Y]],
-					       alpha_num[node_tar[Z]],
-					       port_tar);
-#endif		
-					list_append(results, next_node);
-					found++;
-					if(_find_x_path(results, next_node, 
-							start, first, geometry,
-							found, conn_type)) {
-						return 1;
-					} else {
-						found--;
-						_reset_the_path(curr_switch, 0,
-								1, X);
-						_remove_node(results, 
-							     next_node->coord);
-						return 0;
-					}
-				}
-			}			
+/* #ifdef HAVE_BG */
+/* 					debug2("found %d looking at " */
+/* 					       "%c%c%c going to %c%c%c %d", */
+/* 					       found, */
+/* 					       alpha_num[ba_node->coord[X]], */
+/* 					       alpha_num[ba_node->coord[Y]], */
+/* 					       alpha_num[ba_node->coord[Z]], */
+/* 					       alpha_num[node_tar[X]], */
+/* 					       alpha_num[node_tar[Y]], */
+/* 					       alpha_num[node_tar[Z]], */
+/* 					       port_tar); */
+/* #endif		 */
+/* 					list_append(results, next_node); */
+/* 					found++; */
+/* 					if(_find_x_path(results, next_node,  */
+/* 							start, first, geometry, */
+/* 							found, conn_type)) { */
+/* 						return 1; */
+/* 					} else { */
+/* 						found--; */
+/* 						_reset_the_path(curr_switch, 0, */
+/* 								1, X); */
+/* 						_remove_node(results,  */
+/* 							     next_node->coord); */
+/* 						return 0; */
+/* 					} */
+/* 				} */
+/* 			}			 */
 
 			if(broke && (found == geometry[X])) {
 				goto found_path;
@@ -4433,204 +4445,204 @@ return_0:
 	return 0;
 }
 
-static int _find_passthrough(ba_switch_t *curr_switch, int source_port, 
-			     List nodes, int dim, int count, int highest_phys_x) 
-{
-	ba_switch_t *next_switch = NULL; 
-	ba_path_switch_t *path_add = 
-		(ba_path_switch_t *) xmalloc(sizeof(ba_path_switch_t));
-	ba_path_switch_t *path_switch = NULL;
-	ba_path_switch_t *temp_switch = NULL;
-	int port_tar;
-	int target_port = 0;
-	int ports_to_try[2] = {3,5};
-	int *node_tar= curr_switch->ext_wire[0].node_tar;
-	int *node_src = curr_switch->ext_wire[0].node_tar;
-	int i;
-	int used=0;
-	int broke = 0;
-	ba_node_t *ba_node = NULL;
+/* static int _find_passthrough(ba_switch_t *curr_switch, int source_port,  */
+/* 			     List nodes, int dim, int count, int highest_phys_x)  */
+/* { */
+/* 	ba_switch_t *next_switch = NULL;  */
+/* 	ba_path_switch_t *path_add =  */
+/* 		(ba_path_switch_t *) xmalloc(sizeof(ba_path_switch_t)); */
+/* 	ba_path_switch_t *path_switch = NULL; */
+/* 	ba_path_switch_t *temp_switch = NULL; */
+/* 	int port_tar; */
+/* 	int target_port = 0; */
+/* 	int ports_to_try[2] = {3,5}; */
+/* 	int *node_tar= curr_switch->ext_wire[0].node_tar; */
+/* 	int *node_src = curr_switch->ext_wire[0].node_tar; */
+/* 	int i; */
+/* 	int used=0; */
+/* 	int broke = 0; */
+/* 	ba_node_t *ba_node = NULL; */
 	
-	ListIterator itr;
-	static bool found = false;
-
-	path_add->geometry[X] = node_src[X];
-#ifdef HAVE_BG
-	path_add->geometry[Y] = node_src[Y];
-	path_add->geometry[Z] = node_src[Z];
-#endif
-	path_add->dim = dim;
-	path_add->in = source_port;
+/* 	ListIterator itr; */
+/* 	static bool found = false; */
+
+/* 	path_add->geometry[X] = node_src[X]; */
+/* #ifdef HAVE_BG */
+/* 	path_add->geometry[Y] = node_src[Y]; */
+/* 	path_add->geometry[Z] = node_src[Z]; */
+/* #endif */
+/* 	path_add->dim = dim; */
+/* 	path_add->in = source_port; */
 	
-	if(count>=best_count) {
-		xfree(path_add);
-		return 0;
-	}
+/* 	if(count>=best_count) { */
+/* 		xfree(path_add); */
+/* 		return 0; */
+/* 	} */
 
-	itr = list_iterator_create(nodes);
-	while((ba_node = (ba_node_t*) list_next(itr))) {
+/* 	itr = list_iterator_create(nodes); */
+/* 	while((ba_node = (ba_node_t*) list_next(itr))) { */
 		
-#ifdef HAVE_BG
-		if(node_tar[X] == ba_node->coord[X] 
-		   && node_tar[Y] == ba_node->coord[Y] 
-		   && node_tar[Z] == ba_node->coord[Z]) {
-			broke = 1;
-			break;
-		}
-#else
-		if(node_tar[X] == ba_node->coord[X]) {
-			broke = 1;
-			break;
-		}
-#endif
+/* #ifdef HAVE_BG */
+/* 		if(node_tar[X] == ba_node->coord[X]  */
+/* 		   && node_tar[Y] == ba_node->coord[Y]  */
+/* 		   && node_tar[Z] == ba_node->coord[Z]) { */
+/* 			broke = 1; */
+/* 			break; */
+/* 		} */
+/* #else */
+/* 		if(node_tar[X] == ba_node->coord[X]) { */
+/* 			broke = 1; */
+/* 			break; */
+/* 		} */
+/* #endif */
 		
-	}
-	list_iterator_destroy(itr);
-	ba_node = &ba_system_ptr->
-		grid[node_tar[X]]
-#ifdef HAVE_BG
-		[node_tar[Y]]
-		[node_tar[Z]]
-#endif
-		;
-	if(!broke && count>0
-	   && !ba_node->used 
-	   && (ba_node->phys_x < highest_phys_x)) {
+/* 	} */
+/* 	list_iterator_destroy(itr); */
+/* 	ba_node = &ba_system_ptr-> */
+/* 		grid[node_tar[X]] */
+/* #ifdef HAVE_BG */
+/* 		[node_tar[Y]] */
+/* 		[node_tar[Z]] */
+/* #endif */
+/* 		; */
+/* 	if(!broke && count>0 */
+/* 	   && !ba_node->used  */
+/* 	   && (ba_node->phys_x < highest_phys_x)) { */
 		
-		debug3("this one not found %c%c%c",
-		       alpha_num[node_tar[X]],
-		       alpha_num[node_tar[Y]],
-		       alpha_num[node_tar[Z]]);
+/* 		debug3("this one not found %c%c%c", */
+/* 		       alpha_num[node_tar[X]], */
+/* 		       alpha_num[node_tar[Y]], */
+/* 		       alpha_num[node_tar[Z]]); */
 		
-		broke = 0;
+/* 		broke = 0; */
 				
-		if((source_port%2))
-			target_port=1;
+/* 		if((source_port%2)) */
+/* 			target_port=1; */
 		
-		list_destroy(best_path);
-		best_path = list_create(_delete_path_list);
-		found = true;
-		path_add->out = target_port;
-		list_push(path, path_add);
+/* 		list_destroy(best_path); */
+/* 		best_path = list_create(_delete_path_list); */
+/* 		found = true; */
+/* 		path_add->out = target_port; */
+/* 		list_push(path, path_add); */
 		
-		itr = list_iterator_create(path);
-		while((path_switch = (ba_path_switch_t*) list_next(itr))){
+/* 		itr = list_iterator_create(path); */
+/* 		while((path_switch = (ba_path_switch_t*) list_next(itr))){ */
 		
-			temp_switch = (ba_path_switch_t *) 
-				xmalloc(sizeof(ba_path_switch_t));
+/* 			temp_switch = (ba_path_switch_t *)  */
+/* 				xmalloc(sizeof(ba_path_switch_t)); */
 			 
-			temp_switch->geometry[X] = path_switch->geometry[X];
-#ifdef HAVE_BG
-			temp_switch->geometry[Y] = path_switch->geometry[Y];
-			temp_switch->geometry[Z] = path_switch->geometry[Z];
-#endif
-			temp_switch->dim = path_switch->dim;
-			temp_switch->in = path_switch->in;
-			temp_switch->out = path_switch->out;
-			list_append(best_path,temp_switch);
-		}
-		list_iterator_destroy(itr);
-		best_count = count;
-		return 1;
-	} 
-
-	if(source_port==0 || source_port==3 || source_port==5) {
-		if(count==0) {
-			ports_to_try[0] = 2;
-			ports_to_try[1] = 4;	
-		} else {
-			ports_to_try[0] = 4;
-			ports_to_try[1] = 2;	
-		}
-	}
+/* 			temp_switch->geometry[X] = path_switch->geometry[X]; */
+/* #ifdef HAVE_BG */
+/* 			temp_switch->geometry[Y] = path_switch->geometry[Y]; */
+/* 			temp_switch->geometry[Z] = path_switch->geometry[Z]; */
+/* #endif */
+/* 			temp_switch->dim = path_switch->dim; */
+/* 			temp_switch->in = path_switch->in; */
+/* 			temp_switch->out = path_switch->out; */
+/* 			list_append(best_path,temp_switch); */
+/* 		} */
+/* 		list_iterator_destroy(itr); */
+/* 		best_count = count; */
+/* 		return 1; */
+/* 	}  */
+
+/* 	if(source_port==0 || source_port==3 || source_port==5) { */
+/* 		if(count==0) { */
+/* 			ports_to_try[0] = 2; */
+/* 			ports_to_try[1] = 4;	 */
+/* 		} else { */
+/* 			ports_to_try[0] = 4; */
+/* 			ports_to_try[1] = 2;	 */
+/* 		} */
+/* 	} */
 			
-	for(i=0;i<2;i++) {
-		used=0;
-		if(!curr_switch->int_wire[ports_to_try[i]].used) {
-			itr = list_iterator_create(path);
-			while((path_switch = 
-			       (ba_path_switch_t*) list_next(itr))){
+/* 	for(i=0;i<2;i++) { */
+/* 		used=0; */
+/* 		if(!curr_switch->int_wire[ports_to_try[i]].used) { */
+/* 			itr = list_iterator_create(path); */
+/* 			while((path_switch =  */
+/* 			       (ba_path_switch_t*) list_next(itr))){ */
 				
-				if(((path_switch->geometry[X] == node_src[X]) 
-#ifdef HAVE_BG
-				    && (path_switch->geometry[Y] 
-					== node_src[Y])
-				    && (path_switch->geometry[Z] 
-					== node_tar[Z])
-#endif
-					   )) {
+/* 				if(((path_switch->geometry[X] == node_src[X])  */
+/* #ifdef HAVE_BG */
+/* 				    && (path_switch->geometry[Y]  */
+/* 					== node_src[Y]) */
+/* 				    && (path_switch->geometry[Z]  */
+/* 					== node_tar[Z]) */
+/* #endif */
+/* 					   )) { */
 					
-					if( path_switch->out
-					    == ports_to_try[i]) {
-						used = 1;
-						break;
-					}
-				}
-			}
-			list_iterator_destroy(itr);
+/* 					if( path_switch->out */
+/* 					    == ports_to_try[i]) { */
+/* 						used = 1; */
+/* 						break; */
+/* 					} */
+/* 				} */
+/* 			} */
+/* 			list_iterator_destroy(itr); */
 			
-			if(curr_switch->
-			   ext_wire[ports_to_try[i]].node_tar[X]
-			   == curr_switch->ext_wire[0].node_tar[X]  
-#ifdef HAVE_BG
-			   && curr_switch->
-			   ext_wire[ports_to_try[i]].node_tar[Y] 
-			   == curr_switch->ext_wire[0].node_tar[Y] 
-			   && curr_switch->
-			   ext_wire[ports_to_try[i]].node_tar[Z] 
-			   == curr_switch->ext_wire[0].node_tar[Z]
-#endif
-				) {
-				continue;
-			}
+/* 			if(curr_switch-> */
+/* 			   ext_wire[ports_to_try[i]].node_tar[X] */
+/* 			   == curr_switch->ext_wire[0].node_tar[X]   */
+/* #ifdef HAVE_BG */
+/* 			   && curr_switch-> */
+/* 			   ext_wire[ports_to_try[i]].node_tar[Y]  */
+/* 			   == curr_switch->ext_wire[0].node_tar[Y]  */
+/* 			   && curr_switch-> */
+/* 			   ext_wire[ports_to_try[i]].node_tar[Z]  */
+/* 			   == curr_switch->ext_wire[0].node_tar[Z] */
+/* #endif */
+/* 				) { */
+/* 				continue; */
+/* 			} */
 						
-			if(!used) {
-				port_tar = curr_switch->
-					ext_wire[ports_to_try[i]].port_tar;
-				node_tar = curr_switch->
-					ext_wire[ports_to_try[i]].node_tar;
+/* 			if(!used) { */
+/* 				port_tar = curr_switch-> */
+/* 					ext_wire[ports_to_try[i]].port_tar; */
+/* 				node_tar = curr_switch-> */
+/* 					ext_wire[ports_to_try[i]].node_tar; */
 				
-				next_switch = &ba_system_ptr->
-					grid[node_tar[X]]
-#ifdef HAVE_BG
-					[node_tar[Y]]
-					[node_tar[Z]]
-#endif
-					.axis_switch[X];
+/* 				next_switch = &ba_system_ptr-> */
+/* 					grid[node_tar[X]] */
+/* #ifdef HAVE_BG */
+/* 					[node_tar[Y]] */
+/* 					[node_tar[Z]] */
+/* #endif */
+/* 					.axis_switch[X]; */
 				
-				count++;
-				path_add->out = ports_to_try[i];
-				list_push(path, path_add);
-				debug3("looking at this one "
-				       "%c%c%c %d -> %c%c%c %d",
-				       alpha_num[ba_node->coord[X]],
-				       alpha_num[ba_node->coord[Y]],
-				       alpha_num[ba_node->coord[Z]],
-				       ports_to_try[i],
-				       alpha_num[node_tar[X]],
-				       alpha_num[node_tar[Y]],
-				       alpha_num[node_tar[Z]],
-				       port_tar);
+/* 				count++; */
+/* 				path_add->out = ports_to_try[i]; */
+/* 				list_push(path, path_add); */
+/* 				debug3("looking at this one " */
+/* 				       "%c%c%c %d -> %c%c%c %d", */
+/* 				       alpha_num[ba_node->coord[X]], */
+/* 				       alpha_num[ba_node->coord[Y]], */
+/* 				       alpha_num[ba_node->coord[Z]], */
+/* 				       ports_to_try[i], */
+/* 				       alpha_num[node_tar[X]], */
+/* 				       alpha_num[node_tar[Y]], */
+/* 				       alpha_num[node_tar[Z]], */
+/* 				       port_tar); */
 		
-				_find_passthrough(next_switch, port_tar, nodes,
-						dim, count, highest_phys_x);
-				while((temp_switch = list_pop(path)) 
-				      != path_add){
-					xfree(temp_switch);
-					debug3("something here 2");
-				}
-			}
-		}
-	}
-	xfree(path_add);
-	return 0;
-}
+/* 				_find_passthrough(next_switch, port_tar, nodes, */
+/* 						dim, count, highest_phys_x); */
+/* 				while((temp_switch = list_pop(path))  */
+/* 				      != path_add){ */
+/* 					xfree(temp_switch); */
+/* 					debug3("something here 2"); */
+/* 				} */
+/* 			} */
+/* 		} */
+/* 	} */
+/* 	xfree(path_add); */
+/* 	return 0; */
+/* } */
 
-static int _finish_torus(ba_switch_t *curr_switch, int source_port, 
-			 List nodes, int dim, int count, int *start) 
+static int _finish_torus(ba_switch_t *curr_switch, int source_port,
+			 List nodes, int dim, int count, int *start)
 {
-	ba_switch_t *next_switch = NULL; 
-	ba_path_switch_t *path_add = 
+	ba_switch_t *next_switch = NULL;
+	ba_path_switch_t *path_add =
 		(ba_path_switch_t *) xmalloc(sizeof(ba_path_switch_t));
 	ba_path_switch_t *path_switch = NULL;
 	ba_path_switch_t *temp_switch = NULL;
@@ -4656,9 +4668,9 @@ static int _finish_torus(ba_switch_t *curr_switch, int source_port,
 		xfree(path_add);
 		return 0;
 	}
-	if(node_tar[X] == start[X] 
+	if(node_tar[X] == start[X]
 #ifdef HAVE_BG
-	    && node_tar[Y] == start[Y] 
+	    && node_tar[Y] == start[Y]
 	    && node_tar[Z] == start[Z]
 #endif
 		) {
@@ -4674,18 +4686,18 @@ static int _finish_torus(ba_switch_t *curr_switch, int source_port,
 			list_push(path, path_add);
 			
 			itr = list_iterator_create(path);
-			while((path_switch = 
+			while((path_switch =
 			       (ba_path_switch_t*) list_next(itr))){
 				
-				temp_switch = (ba_path_switch_t *) 
+				temp_switch = (ba_path_switch_t *)
 					xmalloc(sizeof(ba_path_switch_t));
 				
-				temp_switch->geometry[X] = 
+				temp_switch->geometry[X] =
 					path_switch->geometry[X];
 #ifdef HAVE_BG
-				temp_switch->geometry[Y] = 
+				temp_switch->geometry[Y] =
 					path_switch->geometry[Y];
-				temp_switch->geometry[Z] = 
+				temp_switch->geometry[Z] =
 					path_switch->geometry[Z];
 #endif
 				temp_switch->dim = path_switch->dim;
@@ -4696,26 +4708,26 @@ static int _finish_torus(ba_switch_t *curr_switch, int source_port,
 			list_iterator_destroy(itr);
 			best_count = count;
 			return 1;
-		} 
+		}
 	}
 	
 	if(source_port==0 || source_port==3 || source_port==5) {
 		ports_to_try[0] = 4;
-		ports_to_try[1] = 2;		
+		ports_to_try[1] = 2;
 	}
 	
 	for(i=0;i<2;i++) {
 		used=0;
 		if(!curr_switch->int_wire[ports_to_try[i]].used) {
 			itr = list_iterator_create(path);
-			while((path_switch = 
+			while((path_switch =
 			       (ba_path_switch_t*) list_next(itr))){
 				
-				if(((path_switch->geometry[X] == node_src[X]) 
+				if(((path_switch->geometry[X] == node_src[X])
 #ifdef HAVE_BG
-				    && (path_switch->geometry[Y] 
+				    && (path_switch->geometry[Y]
 					== node_src[Y])
-				    && (path_switch->geometry[Z] 
+				    && (path_switch->geometry[Z]
 					== node_tar[Z])
 #endif
 					)) {
@@ -4728,13 +4740,13 @@ static int _finish_torus(ba_switch_t *curr_switch, int source_port,
 			}
 			list_iterator_destroy(itr);
 			if((curr_switch->
-			    ext_wire[ports_to_try[i]].node_tar[X] == 
-			    curr_switch->ext_wire[0].node_tar[X] && 
+			    ext_wire[ports_to_try[i]].node_tar[X] ==
+			    curr_switch->ext_wire[0].node_tar[X] &&
 			    curr_switch->
-			    ext_wire[ports_to_try[i]].node_tar[Y] == 
-			    curr_switch->ext_wire[0].node_tar[Y] && 
+			    ext_wire[ports_to_try[i]].node_tar[Y] ==
+			    curr_switch->ext_wire[0].node_tar[Y] &&
 			    curr_switch->
-			    ext_wire[ports_to_try[i]].node_tar[Z] == 
+			    ext_wire[ports_to_try[i]].node_tar[Z] ==
 			    curr_switch->ext_wire[0].node_tar[Z])) {
 				continue;
 			}
@@ -4758,11 +4770,11 @@ static int _finish_torus(ba_switch_t *curr_switch, int source_port,
 				list_push(path, path_add);
 				_finish_torus(next_switch, port_tar, nodes,
 						dim, count, start);
-				while((temp_switch = list_pop(path)) 
+				while((temp_switch = list_pop(path))
 				      != path_add){
 					xfree(temp_switch);
 					debug3("something here 3");
-				} 
+				}
 			}
 		}
        }
@@ -4852,6 +4864,7 @@ int main(int argc, char** argv)
 	ba_request_t *request = (ba_request_t*) xmalloc(sizeof(ba_request_t)); 
 	log_options_t log_opts = LOG_OPTS_INITIALIZER;
 	int debug_level = 6;
+	node_info_msg_t *new_node_ptr = NULL;
 
 	List results;
 //	List results2;
@@ -4866,7 +4879,12 @@ int main(int argc, char** argv)
 	DIM_SIZE[X]=0;
 	DIM_SIZE[Y]=0;
 	DIM_SIZE[Z]=0;
-	ba_init(NULL);
+	while (slurm_load_node((time_t) NULL, &new_node_ptr, SHOW_ALL)) { 
+		
+		sleep(10);	/* keep trying to reconnect */
+	}
+	
+	ba_init(new_node_ptr);
 	init_wires(NULL);
 						
 	/* results = list_create(NULL); */
@@ -4914,11 +4932,15 @@ int main(int argc, char** argv)
 	list_destroy(results);
 
 	results = list_create(NULL);
-	request->geometry[0] = 1;
+	request->geometry[0] = 4;
 	request->geometry[1] = 1;
 	request->geometry[2] = 1;
-	request->start_req = 0;
-	request->size = 1;
+	request->start[0] = 1;
+	request->start[1] = 0;
+	request->start[2] = 0;
+	request->start_req = 1;
+	request->rotate = 0;
+	request->elongate = 0;
 	request->conn_type = SELECT_TORUS;
 	new_ba_request(request);
 	print_ba_request(request);
@@ -4959,7 +4981,6 @@ int main(int argc, char** argv)
 /* 		       request->geometry[1], */
 /* 		       request->geometry[2]); */
 /* 	} */
-
 	
 	int dim,j;
 	int x,y,z;
diff --git a/src/plugins/select/bluegene/block_allocator/block_allocator.h b/src/plugins/select/bluegene/block_allocator/block_allocator.h
index 4b87ec198..cc69f7d35 100644
--- a/src/plugins/select/bluegene/block_allocator/block_allocator.h
+++ b/src/plugins/select/bluegene/block_allocator/block_allocator.h
@@ -182,7 +182,7 @@ typedef struct {
 	int color;
 	int index;
 	int state;
-	int phys_x;	
+//	int phys_x;	// no longer needed 
 } ba_node_t;
 
 typedef struct {
diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c
index 416ee1366..923348d84 100644
--- a/src/plugins/select/cons_res/select_cons_res.c
+++ b/src/plugins/select/cons_res/select_cons_res.c
@@ -2,7 +2,7 @@
  *  select_cons_res.c - node selection plugin supporting consumable 
  *  resources policies.
  *
- *  $Id: select_cons_res.c 12189 2007-08-31 17:57:29Z jette $
+ *  $Id: select_cons_res.c 12266 2007-09-11 20:34:43Z jette $
  *****************************************************************************\
  *
  *  The following example below illustrates how four jobs are allocated
@@ -2099,7 +2099,6 @@ extern int select_p_job_suspend(struct job_record *job_ptr)
 			default:
 				break;
 			}
-			break;
 		}
 		rc = SLURM_SUCCESS;
 		break;
@@ -2179,7 +2178,6 @@ extern int select_p_job_resume(struct job_record *job_ptr)
 			default:
 				break;
 			}
-			break;
 		}
 		rc = SLURM_SUCCESS;
 		break;
diff --git a/src/salloc/opt.c b/src/salloc/opt.c
index 245fc959d..82e371151 100644
--- a/src/salloc/opt.c
+++ b/src/salloc/opt.c
@@ -85,6 +85,7 @@
 #define OPT_NO_BELL     0x10
 #define OPT_JOBID       0x11
 #define OPT_EXCLUSIVE   0x12
+#define OPT_OVERCOMMIT  0x13
 
 /* generic getopt_long flags, integers and *not* valid characters */
 #define LONG_OPT_JOBID       0x105
@@ -441,6 +442,7 @@ static void _opt_default()
 	opt.kill_command_signal_set = false;
 
 	opt.immediate	= false;
+	opt.overcommit	= false;
 	opt.max_wait	= 0;
 
 	opt.quiet = 0;
@@ -504,6 +506,7 @@ env_vars_t env_vars[] = {
   {"SALLOC_BELL",          OPT_BELL,       NULL,               NULL           },
   {"SALLOC_NO_BELL",       OPT_NO_BELL,    NULL,               NULL           },
   {"SALLOC_EXCLUSIVE",     OPT_EXCLUSIVE,  NULL,               NULL           },
+  {"SALLOC_OVERCOMMIT",    OPT_OVERCOMMIT, NULL,               NULL           },
   {NULL, 0, NULL, NULL}
 };
 
@@ -612,6 +615,9 @@ _process_env_var(env_vars_t *e, const char *val)
 	case OPT_EXCLUSIVE:
 		opt.shared = 0;
 		break;
+	case OPT_OVERCOMMIT:
+		opt.overcommit = true;
+		break;
 	default:
 		/* do nothing */
 		break;
@@ -660,6 +666,7 @@ void set_options(const int argc, char **argv)
 		{"kill-command",  optional_argument, 0, 'K'},
 		{"tasks",         required_argument, 0, 'n'},
 		{"nodes",         required_argument, 0, 'N'},
+		{"overcommit",    no_argument,       0, 'O'},
 		{"partition",     required_argument, 0, 'p'},
 		{"quiet",         no_argument,       0, 'q'},
 		{"no-rotate",     no_argument,       0, 'R'},
@@ -694,7 +701,7 @@ void set_options(const int argc, char **argv)
 		{"reboot",	  no_argument,       0, LONG_OPT_REBOOT},
 		{NULL,            0,                 0, 0}
 	};
-	char *opt_string = "+a:c:C:d:F:g:hHIJ:kK::n:N:p:qR:st:uU:vVw:W:x:";
+	char *opt_string = "+a:c:C:d:F:g:hHIJ:kK::n:N:Op:qR:st:uU:vVw:W:x:";
 
 	opt.progname = xbasename(argv[0]);
 	optind = 0;		
@@ -772,6 +779,9 @@ void set_options(const int argc, char **argv)
 				exit(1);
 			}
 			break;
+		case 'O':
+			opt.overcommit = true;
+			break;
 		case 'p':
 			xfree(opt.partition);
 			opt.partition = xstrdup(optarg);
@@ -1247,6 +1257,7 @@ static void _opt_list()
 		info("jobid          : %u", opt.jobid);
 	info("verbose        : %d", opt.verbose);
 	info("immediate      : %s", tf_(opt.immediate));
+	info("overcommit     : %s", tf_(opt.overcommit));
 	if (opt.time_limit == INFINITE)
 		info("time_limit     : INFINITE");
 	else if (opt.time_limit != NO_VAL)
@@ -1288,7 +1299,7 @@ static void _usage(void)
  	printf(
 "Usage: salloc [-N numnodes|[min nodes]-[max nodes]] [-n num-processors]\n"
 "              [[-c cpus-per-node] [-r n] [-p partition] [--hold] [-t minutes]\n"
-"              [--immediate] [--no-kill]\n"
+"              [--immediate] [--no-kill] [--overcommit]\n"
 "              [--share] [-J jobname] [--jobid=id]\n"
 "              [--verbose] [--gid=group] [--uid=user]\n"
 "              [-W sec] [--minsockets=n] [--mincores=n] [--minthreads=n]\n"
@@ -1318,6 +1329,7 @@ static void _help(void)
 "  -I, --immediate             exit if resources are not immediately available\n"
 "  -k, --no-kill               do not kill job on node failure\n"
 "  -K, --kill-command[=signal] signal to send terminating job\n"
+"  -O, --overcommit            overcommit resources\n"
 "  -s, --share                 share nodes with other jobs\n"
 "  -J, --job-name=jobname      name of job\n"
 "      --jobid=id              specify jobid to use\n"
diff --git a/src/salloc/opt.h b/src/salloc/opt.h
index 3a4d2c122..b294c0363 100644
--- a/src/salloc/opt.h
+++ b/src/salloc/opt.h
@@ -1,6 +1,6 @@
 /*****************************************************************************\
  *  opt.h - definitions for salloc option processing
- *  $Id: opt.h 11992 2007-08-10 18:59:15Z jette $
+ *  $Id: opt.h 12196 2007-08-31 21:28:28Z jette $
  *****************************************************************************
  *  Copyright (C) 2002-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -78,6 +78,7 @@ typedef struct salloc_options {
 
 	bool hold;		/* --hold, -H			*/
 	bool no_kill;		/* --no-kill, -k		*/
+	bool overcommit;	/* --overcommit -O		*/
 	int kill_command_signal;/* --kill-command, -K           */
 	bool kill_command_signal_set;
 	uint16_t shared;	/* --share,   -s		*/
diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c
index 3a0b60e8d..eb881a1a4 100644
--- a/src/salloc/salloc.c
+++ b/src/salloc/salloc.c
@@ -2,7 +2,7 @@
  *  salloc.c - Request a SLURM job allocation and
  *             launch a user-specified command.
  *
- *  $Id: salloc.c 12187 2007-08-31 16:07:57Z jette $
+ *  $Id: salloc.c 12196 2007-08-31 21:28:28Z jette $
  *****************************************************************************
  *  Copyright (C) 2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -165,6 +165,10 @@ int main(int argc, char *argv[])
 	/* Add default task count for srun, if not already set */
 	if (opt.nprocs_set)
 		env_array_append_fmt(&env, "SLURM_NPROCS", "%d", opt.nprocs);
+	if (opt.overcommit) {
+		env_array_append_fmt(&env, "SLURM_OVERCOMMIT", "%d", 
+			opt.overcommit);
+	}
 	env_array_set_environment(env);
 	env_array_free(env);
 	pthread_mutex_lock(&allocation_state_lock);
@@ -285,7 +289,11 @@ static int fill_job_desc_from_opts(job_desc_msg_t *desc)
 		desc->job_min_memory = opt.realmem;
 	if (opt.tmpdisk > -1)
 		desc->job_min_tmp_disk = opt.tmpdisk;
-	desc->num_procs = opt.nprocs * opt.cpus_per_task;
+	if (opt.overcommit) {
+		desc->num_procs = opt.min_nodes;
+		desc->overcommit = opt.overcommit;
+	} else
+		desc->num_procs = opt.nprocs * opt.cpus_per_task;
 	if (opt.nprocs_set)
 		desc->num_tasks = opt.nprocs;
 	if (opt.cpus_set)
diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c
index 26f280aeb..5a933616f 100644
--- a/src/slurmctld/agent.c
+++ b/src/slurmctld/agent.c
@@ -2,7 +2,7 @@
  *  agent.c - parallel background communication functions. This is where  
  *	logic could be placed for broadcast communications.
  *
- *  $Id: agent.c 12088 2007-08-22 18:02:24Z jette $
+ *  $Id: agent.c 12370 2007-09-20 19:18:17Z jette $
  *****************************************************************************
  *  Copyright (C) 2002-2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -871,9 +871,10 @@ static void *_thread_per_group_rpc(void *args)
 				run_scheduler = true;
 			unlock_slurmctld(job_write_lock);
 		}
-			
-		/* SPECIAL CASE: Kill non-startable batch job */
-		if ((msg_type == REQUEST_BATCH_JOB_LAUNCH) && rc &&
+		/* SPECIAL CASE: Kill non-startable batch job, 
+		 * Requeue the job on ESLURMD_PROLOG_FAILED */
+		if ((msg_type == REQUEST_BATCH_JOB_LAUNCH) && 
+		    (rc != SLURM_SUCCESS) && (rc != ESLURMD_PROLOG_FAILED) &&
 		    (ret_data_info->type != RESPONSE_FORWARD_FAILED)) {
 			batch_job_launch_msg_t *launch_msg_ptr = 
 				task_ptr->msg_args_ptr;
@@ -890,9 +891,9 @@ static void *_thread_per_group_rpc(void *args)
 #endif
 		
 		
-		if (((msg_type == REQUEST_SIGNAL_TASKS) 
-		     ||   (msg_type == REQUEST_TERMINATE_TASKS)) 
-		    && (rc == ESRCH)) {
+		if (((msg_type == REQUEST_SIGNAL_TASKS) || 
+		     (msg_type == REQUEST_TERMINATE_TASKS)) && 
+		     (rc == ESRCH)) {
 			/* process is already dead, not a real error */
 			rc = SLURM_SUCCESS;
 		}
@@ -1190,7 +1191,8 @@ void agent_queue_request(agent_arg_t *agent_arg_ptr)
 {
 	queued_request_t *queued_req_ptr = NULL;
 
-	if (agent_cnt < MAX_AGENT_CNT) {	/* execute now */
+	if ((agent_cnt < MAX_AGENT_CNT) ||		/* execute now */
+	    (agent_arg_ptr->msg_type == REQUEST_SHUTDOWN)) {
 		pthread_attr_t attr_agent;
 		pthread_t thread_agent;
 		int rc;
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index a634d4b06..4f929aa63 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -3,7 +3,7 @@
  *	Note: there is a global job list (job_list), time stamp 
  *	(last_job_update), and hash table (job_hash)
  *
- *  $Id: job_mgr.c 12042 2007-08-16 22:39:37Z jette $
+ *  $Id: job_mgr.c 12339 2007-09-17 19:25:19Z jette $
  *****************************************************************************
  *  Copyright (C) 2002-2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -1014,6 +1014,7 @@ extern int kill_running_job_by_node_name(char *node_name, bool step_test)
 	struct node_record *node_ptr;
 	int bit_position;
 	int job_count = 0;
+	time_t now = time(NULL);
 
 	node_ptr = find_node_record(node_name);
 	if (node_ptr == NULL)	/* No such node */
@@ -1053,10 +1054,28 @@ extern int kill_running_job_by_node_name(char *node_name, bool step_test)
 				continue;
 
 			job_count++;
-			srun_node_fail(job_ptr->job_id, node_name);
-			if ((job_ptr->details == NULL) ||
-			    (job_ptr->kill_on_node_fail) ||
-			    (job_ptr->node_cnt <= 1)) {
+			if ((job_ptr->details) &&
+			    (job_ptr->kill_on_node_fail == 0) &&
+			    (job_ptr->node_cnt > 1)) {
+				/* keep job running on remaining nodes */
+				srun_node_fail(job_ptr->job_id, node_name);
+				error("Removing failed node %s from job_id %u",
+				      node_name, job_ptr->job_id);
+				_excise_node_from_job(job_ptr, node_ptr);
+			} else if (job_ptr->batch_flag && job_ptr->details &&
+			           (job_ptr->details->no_requeue == 0)) {
+				info("requeue job %u due to failure of node %s",
+				     job_ptr->job_id, node_name);
+				_set_job_prio(job_ptr);
+				job_ptr->time_last_active  = now;
+				job_ptr->job_state = JOB_PENDING | JOB_COMPLETING;
+				if (suspended)
+					job_ptr->end_time = job_ptr->suspend_time;
+				else
+					job_ptr->end_time = now;
+				deallocate_nodes(job_ptr, false, suspended);
+				job_completion_logger(job_ptr);
+			} else {
 				info("Killing job_id %u on failed node %s",
 				     job_ptr->job_id, node_name);
 				job_ptr->job_state = JOB_NODE_FAIL | 
@@ -1071,17 +1090,13 @@ extern int kill_running_job_by_node_name(char *node_name, bool step_test)
 					job_ptr->end_time = time(NULL);
 				job_completion_logger(job_ptr);
 				deallocate_nodes(job_ptr, false, suspended);
-			} else {
-				error("Removing failed node %s from job_id %u",
-				      node_name, job_ptr->job_id);
-				_excise_node_from_job(job_ptr, node_ptr);
 			}
 		}
 
 	}
 	list_iterator_destroy(job_iterator);
 	if (job_count)
-		last_job_update = time(NULL);
+		last_job_update = now;
 
 	return job_count;
 }
@@ -1661,12 +1676,12 @@ extern int job_complete(uint32_t job_id, uid_t uid, bool requeue,
 		if (job_return_code == NO_VAL) {
 			job_ptr->job_state = JOB_CANCELLED| job_comp_flag;
 			job_ptr->requid = uid;
-		} else if (WEXITSTATUS(job_return_code)) {
+		} else if (WIFEXITED(job_return_code) &&
+		           WEXITSTATUS(job_return_code)) {
 			job_ptr->job_state = JOB_FAILED   | job_comp_flag;
 			job_ptr->exit_code = job_return_code;
 			job_ptr->state_reason = FAIL_EXIT_CODE;
-		}
-		else if (job_comp_flag &&		/* job was running */
+		} else if (job_comp_flag &&		/* job was running */
 			 (job_ptr->end_time < now)) {	/* over time limit */
 			job_ptr->job_state = JOB_TIMEOUT  | job_comp_flag;
 			job_ptr->exit_code = MAX(job_ptr->exit_code, 1);
diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c
index f0a87fbf4..5faad890f 100644
--- a/src/slurmd/slurmstepd/mgr.c
+++ b/src/slurmd/slurmstepd/mgr.c
@@ -1,6 +1,6 @@
 /*****************************************************************************\
  *  src/slurmd/slurmstepd/mgr.c - job manager functions for slurmstepd
- *  $Id: mgr.c 11856 2007-07-19 02:36:24Z morrone $
+ *  $Id: mgr.c 12244 2007-09-10 21:03:20Z da $
  *****************************************************************************
  *  Copyright (C) 2002-2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -951,8 +951,6 @@ _fork_all_tasks(slurmd_job_t *job)
 		error ("Unable to return to working directory");
 	}
 
-	jobacct_g_set_proctrack_container_id(job->cont_id);
-
 	for (i = 0; i < job->ntasks; i++) {
 		/*
                  * Put this task in the step process group
@@ -975,6 +973,7 @@ _fork_all_tasks(slurmd_job_t *job)
 			return SLURM_ERROR;
 		}
 	}
+	jobacct_g_set_proctrack_container_id(job->cont_id);
 
 	/*
 	 * Now it's ok to unblock the tasks, so they may call exec.
@@ -1685,6 +1684,14 @@ _run_script_as_user(const char *name, const char *path, slurmd_job_t *job,
 
 	debug("[job %u] attempting to run %s [%s]", job->jobid, name, path);
 
+	if (access(path, R_OK | X_OK) < 0) {
+		error("Could not run %s [%s]: %m", name, path);
+		return -1;
+	}
+
+	if (slurm_container_create(job) != SLURM_SUCCESS)
+		error("slurm_container_create: %m");
+
 	if ((cpid = fork()) < 0) {
 		error ("executing %s: fork: %m", name);
 		return -1;
@@ -1707,13 +1714,16 @@ _run_script_as_user(const char *name, const char *path, slurmd_job_t *job,
 			/* child process, should not return */
 			exit(127);
 		}
-	
+
+		chdir(job->cwd);
 		setpgrp();
 		execve(path, argv, env);
 		error("execve(): %m");
 		exit(127);
 	}
 
+	if (slurm_container_add(job, cpid) != SLURM_SUCCESS)
+		error("slurm_container_add: %m");
 	if (max_wait < 0)
 		opt = 0;
 	else
@@ -1725,7 +1735,8 @@ _run_script_as_user(const char *name, const char *path, slurmd_job_t *job,
 			if (errno == EINTR)
 				continue;
 			error("waidpid: %m");
-			return 0;
+			status = 0;
+			break;
 		} else if (rc == 0) {
 			sleep(1);
 			if ((--max_wait) == 0) {
@@ -1733,10 +1744,13 @@ _run_script_as_user(const char *name, const char *path, slurmd_job_t *job,
 				opt = 0;
 			}
 		} else  {
-			killpg(cpid, SIGKILL);	/* kill children too */
-			return status;
+			/* spawned process exited */
+			break;
 		}
 	}
-
-	/* NOTREACHED */
+	/* Insure that all child processes get killed */
+	killpg(cpid, SIGKILL);
+	slurm_container_signal(job->cont_id, SIGKILL);
+	
+	return status;
 }
diff --git a/src/slurmd/slurmstepd/task.c b/src/slurmd/slurmstepd/task.c
index b71a4d05a..7bc5c7285 100644
--- a/src/slurmd/slurmstepd/task.c
+++ b/src/slurmd/slurmstepd/task.c
@@ -1,6 +1,6 @@
 /*****************************************************************************\
  *  slurmd/slurmstepd/task.c - task launching functions for slurmstepd
- *  $Id: task.c 11776 2007-07-02 20:53:02Z jette $
+ *  $Id: task.c 12201 2007-08-31 22:34:26Z jette $
  *****************************************************************************
  *  Copyright (C) 2002-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -152,8 +152,8 @@ _run_script_and_set_env(const char *name, const char *path, slurmd_job_t *job)
 	debug("[job %u] attempting to run %s [%s]", job->jobid, name, path);
 
 	if (access(path, R_OK | X_OK) < 0) {
-		debug("Not running %s [%s]: %m", name, path);
-		return 0;
+		error("Could not run %s [%s]: %m", name, path);
+		return -1;
 	}
 	if (pipe(pfd) < 0) {
 		error("executing %s: pipe: %m", name);
diff --git a/src/squeue/print.h b/src/squeue/print.h
index d062c21a3..0834283e6 100644
--- a/src/squeue/print.h
+++ b/src/squeue/print.h
@@ -86,9 +86,9 @@ int job_format_add_function(List list, int width, bool right_justify,
 #define job_format_add_prefix(list,wid,right,prefix) \
 	job_format_add_function(list,0,0,prefix,_print_job_prefix)
 #define job_format_add_reason(list,wid,right,prefix) \
-        job_format_add_function(list,0,0,prefix,_print_job_reason)
+        job_format_add_function(list,wid,right,prefix,_print_job_reason)
 #define job_format_add_reason_list(list,wid,right,prefix) \
-	job_format_add_function(list,0,0,prefix,_print_job_reason_list)
+	job_format_add_function(list,wid,right,prefix,_print_job_reason_list)
 #define job_format_add_name(list,wid,right,suffix) \
 	job_format_add_function(list,wid,right,suffix,_print_job_name)
 #define job_format_add_user_name(list,wid,right,suffix) \
diff --git a/src/srun/opt.c b/src/srun/opt.c
index d6b165799..69fdc4df4 100644
--- a/src/srun/opt.c
+++ b/src/srun/opt.c
@@ -1,6 +1,6 @@
 /*****************************************************************************\
  *  opt.c - options processing for srun
- *  $Id: opt.c 12187 2007-08-31 16:07:57Z jette $
+ *  $Id: opt.c 12315 2007-09-13 23:56:02Z jette $
  *****************************************************************************
  *  Copyright (C) 2002-2006 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -1098,6 +1098,7 @@ env_vars_t env_vars[] = {
 {"SLURM_STDERRMODE",    OPT_STRING,     &opt.efname,        NULL             },
 {"SLURM_STDINMODE",     OPT_STRING,     &opt.ifname,        NULL             },
 {"SLURM_STDOUTMODE",    OPT_STRING,     &opt.ofname,        NULL             },
+{"SLURM_THREADS",       OPT_INT,        &opt.max_threads,   NULL             },
 {"SLURM_TIMELIMIT",     OPT_STRING,     &opt.time_limit_str,NULL             },
 {"SLURM_WAIT",          OPT_INT,        &opt.max_wait,      NULL             },
 {"SLURM_DISABLE_STATUS",OPT_INT,        &opt.disable_status,NULL             },
@@ -2357,8 +2358,15 @@ static bool _opt_verify(void)
 			}
 		} else if (opt.nodes_set && opt.nprocs_set) {
 
+			/*
+			 * Make sure in a non allocate situation that
+			 * the number of max_nodes is <= number of tasks
+			 */
+			if (!opt.allocate && opt.nprocs < opt.max_nodes) 
+				opt.max_nodes = opt.nprocs;
+
 			/* 
-			 *  make sure # of procs >= min_nodes 
+			 *  make sure # of procs >= min_nodes || max_nodes 
 			 */
 			if (opt.nprocs < opt.min_nodes) {
 
diff --git a/src/srun/srun.c b/src/srun/srun.c
index 9353267e2..efdfbb401 100644
--- a/src/srun/srun.c
+++ b/src/srun/srun.c
@@ -250,6 +250,14 @@ int srun(int ac, char **av)
 		if (msg_thr_create(job) < 0)
 			job_fatal(job, "Unable to create msg thread");
 		exitcode = _run_job_script(job, env);
+
+		/* close up the msg thread cleanly */
+		close(job->forked_msg->msg_par->msg_pipe[1]);
+		debug2("Waiting for message thread");
+		if (pthread_join(job->jtid, NULL) < 0)
+			error ("Waiting on message thread: %m");
+		debug2("done");
+
 		srun_job_destroy(job,exitcode);
 
 		debug ("Spawned srun shell terminated");
diff --git a/testsuite/expect/Makefile.am b/testsuite/expect/Makefile.am
index 490524f99..2d4cd215b 100644
--- a/testsuite/expect/Makefile.am
+++ b/testsuite/expect/Makefile.am
@@ -230,6 +230,7 @@ EXTRA_DIST = \
 	test15.21			\
 	test15.22			\
 	test15.23			\
+	test15.24			\
 	test16.1			\
 	test16.2			\
 	test16.3			\
diff --git a/testsuite/expect/Makefile.in b/testsuite/expect/Makefile.in
index e2dd00284..3752b67bc 100644
--- a/testsuite/expect/Makefile.in
+++ b/testsuite/expect/Makefile.in
@@ -449,6 +449,7 @@ EXTRA_DIST = \
 	test15.21			\
 	test15.22			\
 	test15.23			\
+	test15.24			\
 	test16.1			\
 	test16.2			\
 	test16.3			\
diff --git a/testsuite/expect/README b/testsuite/expect/README
index 5ba47c64e..adce31709 100644
--- a/testsuite/expect/README
+++ b/testsuite/expect/README
@@ -1,5 +1,5 @@
 ############################################################################
-# Copyright (C) 2002-2006 The Regents of the University of California.
+# Copyright (C) 2002-2007 The Regents of the University of California.
 # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
 # Written by Morris Jette <jette1@llnl.gov>
 # UCRL-CODE-226842.
@@ -22,34 +22,58 @@
 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
 ############################################################################
 
-This directory contains a battery of SLURM regression tests. The tests make use 
-of the "expect" scripting language. You can create "globals.local" and 
-identify locations of files to be used in testing, especially the 
-variable "slurm_dir". These tests expect single node jobs submitted to the 
-default partition to respond within a one minutes. 
-If that is not the case, modify the value of "max_job_delay" in the 
-"globals.local" file to an appropriate value or the tests will report 
-failures due to timeouts. 
-If there are file propagation delays (e.g. due to NFS), the value of 
-"max_file_delay" in the "globals.local" file may need modification.
+This directory contains a battery of SLURM regression tests. The tests make
+use of the "expect" scripting language. You can create "globals.local" and 
+identify locations of files to be used in testing, especially the variable 
+"slurm_dir". These tests expect single node jobs submitted to the default 
+partition to respond within a 120 seconds.  If that is not the case, modify 
+the value of "max_job_delay" in the "globals.local" file to an appropriate 
+value or the tests will report failures due to timeouts.  If there are file 
+propagation delays (e.g. due to NFS), the value of "max_file_delay" in the 
+"globals.local" file may need modification. For example:
+    $ cat globals.local
+    set slurm_dir     "/usr/local"
+    set max_job_delay 300
 
 Each test can be executed independently. Upon successful completion, the test 
 will print "SUCCESS" and terminate with an exit code of zero. Upon failure, 
 the test will typically print "FAILURE" and an explanation of the failure. 
 The message "WARNING" indicates that the cluster configuration can not fully 
-test some option (e.g. only one node or partition) and the test terminates with 
-an exit code of zero. In the event of a configuration problem or other 
+test some option (e.g. only one node or partition) and the test terminates 
+with an exit code of zero. In the event of a configuration problem or other 
 catastrophic test failure other messages could be printed and their cause 
 should be investigated. In either case, failing tests will terminate with a 
 non-zero exit code and successful tests will terminate with a zero exit code.
 
-The script "regression" will execute all of the tests and summarize the results.
-This script may be modified to test only selected commands (e.g. sinfo command 
-tests have the prefix "test4.*"). A summary of each test is shown below.
-
-There are also scripts to emulate some commands on systems lacking them (e.g. 
-AIX). These include pkill and usleep.
-
+The script "regression" will execute all of the tests and summarize the 
+results. Standard output contains detailed logging of all events, which is 
+quite verbose. Failure information is written to standard error. A good 
+way to run "regression" is to write its standard output to one file and 
+either write standard error to another file or print it to the terminal.
+Execution time of the full test suite is roughly 80 minutes, but can vary
+considerably with the architecuture, configuration, and system load. Some 
+tests send e-mail, so check for four e-mail messages set the the user 
+running the tests. Here is an example. 
+    $ ./regression >slurm.test.tux.Aug3
+    Completions:357
+    Failures:   0
+    Time (sec): 4375
+    Remember to check for mail send by tests
+
+When failures do occur, check the standard output for details. Searching 
+for the keyword "FAILURE" will typically locate the failing test. Note 
+that some of the tests are architecture or configuration specific.  Also 
+note that most tests are designed to be run as a normal user. Tests 3.# 
+are designed to be run as user root or SlurmUser, but will be skipped 
+when the full test suite is executed as an unpriviledged user. The full 
+test suite is typically executed many times by the SLURM developers on a 
+variety of systems before a SLURM release is made. This has resulted in
+high system reliability. When SLURM bugs are found or features added, 
+this test suite is expanded.
+
+A summary of each test is shown below. There are also scripts to emulate 
+some commands on systems lacking them (e.g.  AIX). These include pkill and 
+usleep.
 ############################################################################
 
 
@@ -389,6 +413,7 @@ test15.22  Test of partition specification on job submission (--partition
            option).
 test15.23  Test of environment variables that control salloc actions: 
            SALLOC_ACCOUNT, SALLOC_DEBUG and SALLOC_TIMELIMIT
+test15.24  Test of --overcommit option.
 
 
 test16.#   Testing of sattach options.
diff --git a/testsuite/expect/globals b/testsuite/expect/globals
index c7414e697..4d4c7f51f 100755
--- a/testsuite/expect/globals
+++ b/testsuite/expect/globals
@@ -350,6 +350,12 @@ proc wait_for_file { file_name } {
 	for {set my_delay 0} {$my_delay <= $max_file_delay} {incr my_delay} {
 		if [file exists $file_name] {
 #			Add small delay for I/O buffering
+			for {} {$my_delay <= $max_file_delay} {incr my_delay} {
+				if {[file size $file_name] != 0} {
+					break
+				}
+				exec $bin_sleep 1
+			}
 			exec $bin_sleep 2
 			return 0
 		}
diff --git a/testsuite/expect/regression b/testsuite/expect/regression
index f2ae34c94..4a386b89c 100755
--- a/testsuite/expect/regression
+++ b/testsuite/expect/regression
@@ -56,7 +56,7 @@ for major in `seq 1 20`; do
 		TEST=test${major}.${minor}
 		if [ ! -f $TEST ]; then continue; fi
 
-		expect $TEST
+		$TEST
 		if [ $? -eq 0 ]
 		then
 			COMPLETIONS=$((COMPLETIONS+1))
@@ -81,6 +81,10 @@ echo ""
 echo "Completions:$COMPLETIONS"
 echo "Failures:   $FAILURES"
 echo "Time (sec): $DELTA_TIME"
+
+echo "Completions:$COMPLETIONS" >& 2
+echo "Failures:   $FAILURES"    >& 2
+echo "Time (sec): $DELTA_TIME"  >& 2
 echo "Remember to check for mail send by tests" >& 2
 
 if [ $FAILURES -eq 0 ]
diff --git a/testsuite/expect/test1.52 b/testsuite/expect/test1.52
index bed79db7e..e67eb9cfc 100755
--- a/testsuite/expect/test1.52
+++ b/testsuite/expect/test1.52
@@ -38,7 +38,6 @@ set num_tasks   2
 set node_count  0
 set max_nodes   0
 set task_count  0
-set job_id      0
 set hostfile    "test$test_id.hostfile"
 
 print_header $test_id
@@ -115,10 +114,10 @@ for {set i 0} {$i<3} {incr i} {
 	#
 	# execute srun with a specific node count
 	#
-	set job_id 0
 	set node0  ""
 	set node1  ""
-	set srun_pid [spawn $srun -N3 -l $bin_printenv SLURMD_NODENAME]
+	set node2  ""
+	set srun_pid [spawn $srun -N3 -t1 -l $bin_printenv SLURMD_NODENAME]
 	expect {
 		-re "SwitchType does not permit arbitrary task distribution" {
 			set no_hostfile 1
@@ -135,10 +134,6 @@ for {set i 0} {$i<3} {incr i} {
 			}
 			exp_continue
 		}
-		-re "slurm job ($number)" {
-			set job_id $expect_out(1,string)
-			exp_continue
-		}
 		timeout {
 			send_user "\nFAILURE: srun not responding\n"
 			slow_kill $srun_pid
diff --git a/testsuite/expect/test1.88 b/testsuite/expect/test1.88
index 2450d925f..b23295b91 100755
--- a/testsuite/expect/test1.88
+++ b/testsuite/expect/test1.88
@@ -57,6 +57,10 @@ if {[test_front_end] != 0} {
         send_user "\nWARNING: This test is incompatable with front-end systems\n"
         exit 0
 }
+if {[test_aix] == 1} {
+	send_user "WARNING: Test is incompatible with AIX\n"
+	exit 0
+}
 
 #
 # Delete left-over program and rebuild it
diff --git a/testsuite/expect/test1.91 b/testsuite/expect/test1.91
old mode 100644
new mode 100755
index cc33edd43..a8f375a6a
--- a/testsuite/expect/test1.91
+++ b/testsuite/expect/test1.91
@@ -8,7 +8,7 @@
 #          "FAILURE: ..." otherwise with an explanation of the failure, OR
 #          anything else indicates a failure mode that must be investigated.
 ############################################################################
-# Copyright (C) 2005 The Regents of the University of California.
+# Copyright (C) 2005-2007 The Regents of the University of California.
 # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
 # Written by Morris Jette <jette1@llnl.gov>
 # UCRL-CODE-226842.
@@ -101,7 +101,7 @@ exec $bin_chmod 700 $file_prog
 #
 global env
 set env(SLURM_CPU_BIND) "verbose"
-set srun_pid [spawn $srun --allocate -N1 --exclusive --verbose -t2]
+set salloc_pid [spawn $salloc -N1 --exclusive --verbose -t2 $bin_bash]
 
 #############################################################################
 #
@@ -123,9 +123,9 @@ expect {
 		exp_continue
 	}
 	timeout {
-		send_user "\nFAILURE: srun (from --allocate) not responding "
+		send_user "\nFAILURE: salloc not responding "
 		send_user "or failure to recognize prompt\n"
-		slow_kill $srun_pid
+		slow_kill $salloc_pid
 		exit 1
 	}
 	-re $prompt
@@ -241,7 +241,7 @@ while {$this_cnt <= $num_cores} {
 			exp_continue
 		}
 		timeout {
-			send_user "\nFAILURE: srun (from --allocate) not responding "
+			send_user "\nFAILURE: salloc not responding "
 			send_user "or failure to recognize prompt\n"
 			set exit_code 1
 		}
@@ -291,7 +291,7 @@ while {$this_cnt <= $num_threads} {
 			exp_continue
 		}
 		timeout {
-			send_user "\nFAILURE: srun (from --allocate) not responding "
+			send_user "\nFAILURE: salloc not responding "
 			send_user "or failure to recognize prompt\n"
 			set exit_code 1
 		}
@@ -340,7 +340,7 @@ while {$this_cnt <= $task_cnt} {
 			exp_continue
 		}
 		timeout {
-			send_user "\nFAILURE: srun (from --allocate) not responding "
+			send_user "\nFAILURE: salloc not responding "
 			send_user "or failure to recognize prompt\n"
 			set exit_code 1
 		}
@@ -376,7 +376,7 @@ expect {
 		exp_continue
 	}
 	timeout {
-		send_user "\nFAILURE: srun (from --allocate) not responding "
+		send_user "\nFAILURE: salloc not responding "
 		send_user "or failure to recognize prompt\n"
 		set exit_code 1
 	}
@@ -398,9 +398,9 @@ expect {
 		set exit_code 1
 	}
 	timeout {
-		send_user "\nFAILURE: srun (from --allocate) not responding "
+		send_user "\nFAILURE: salloc not responding "
 		send_user "or failure to recognize prompt\n"
-		slow_kill $srun_pid
+		slow_kill $salloc_pid
 		set exit_code 1
 	}
 	eof {
diff --git a/testsuite/expect/test1.92 b/testsuite/expect/test1.92
old mode 100644
new mode 100755
index c742424d3..1abef4fa8
--- a/testsuite/expect/test1.92
+++ b/testsuite/expect/test1.92
@@ -51,7 +51,7 @@ exit 0
 #
 # Create an allocation
 #
-set srun_pid [spawn $srun --allocate -N2 --verbose -t2]
+set salloc_pid [spawn $salloc -N2 -n4 --verbose -t2 $bin_bash]
 expect {
 	-re "More ($alpha) requested than permitted" {
 		send_user "\nWARNING: can't test srun task distribution\n"
@@ -61,8 +61,8 @@ expect {
 		send "$srun -l -c1 $file_bash | sort -n\n"
 	}
 	timeout {
-		send_user "\nFAILURE: srun not responding\n"
-		slow_kill $srun_pid
+		send_user "\nFAILURE: salloc not responding\n"
+		slow_kill $salloc_pid
 		exit 1
 	}
 }
@@ -92,9 +92,9 @@ expect {
 		exp_continue
 	}
 	timeout {
-		send_user "\nFAILURE: srun (from --allocate) not responding "
+		send_user "\nFAILURE: salloc not responding "
 		send_user "or failure to recognize prompt\n"
-		slow_kill $srun_pid
+		slow_kill $salloc_pid
 		exit 1
 	}
 	-re $prompt
@@ -269,9 +269,9 @@ expect {
 		set exit_code 1
 	}
 	timeout {
-		send_user "\nFAILURE: srun (from --allocate) not responding "
+		send_user "\nFAILURE: salloc not responding "
 		send_user "or failure to recognize prompt\n"
-		slow_kill $srun_pid
+		slow_kill $salloc_pid
 		set exit_code 1
 	}
 	eof {
diff --git a/testsuite/expect/test11.5 b/testsuite/expect/test11.5
index 7d3d32ab1..12b1b2581 100755
--- a/testsuite/expect/test11.5
+++ b/testsuite/expect/test11.5
@@ -159,6 +159,13 @@ if {$matches != 1} {
 	set exit_code 1
 }
 
+# Actual checkpoint on AIX only works for tasks launched using POE
+if {[test_aix] == 1} {
+	send_user "WARNING: Further testing is incompatible with AIX\n"
+	cancel_job $job_id
+	exit $exit_code
+}
+
 #
 # Create a checkpoint, continue execution
 #
diff --git a/testsuite/expect/test14.7 b/testsuite/expect/test14.7
index e2b617494..a296a63e4 100755
--- a/testsuite/expect/test14.7
+++ b/testsuite/expect/test14.7
@@ -130,6 +130,11 @@ if {[wait_for_file $file_err] == 0} {
 			incr matches
 			exp_continue
 		}
+		-re "not found" {
+			send_user "These errors are expected, no worries\n"
+			incr matches
+			exp_continue
+		}
 		eof {
 			wait
 		}
diff --git a/testsuite/expect/test15.24 b/testsuite/expect/test15.24
new file mode 100755
index 000000000..4f64f8509
--- /dev/null
+++ b/testsuite/expect/test15.24
@@ -0,0 +1,97 @@
+#!/usr/bin/expect
+############################################################################
+# Purpose: Test of SLURM functionality
+#          Test of --overcommit option.
+#
+# Output:  "TEST: #.#" followed by "SUCCESS" if test was successful, OR
+#          "FAILURE: ..." otherwise with an explanation of the failure, OR
+#          anything else indicates a failure mode that must be investigated.
+############################################################################
+# Copyright (C) 2002-2007 The Regents of the University of California.
+# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+# Written by Morris Jette <jette1@llnl.gov>
+# UCRL-CODE-226842.
+# 
+# This file is part of SLURM, a resource management program.
+# For details, see <http://www.llnl.gov/linux/slurm/>.
+#  
+# SLURM is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+# 
+# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+# details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with SLURM; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
+############################################################################
+source ./globals
+
+set test_id     "15.24"
+set exit_code   0
+set file_in     "test$test_id.input"
+set file_out    "test$test_id.output"
+set job_id      0
+set task_cnt    10
+
+print_header $test_id
+
+#
+# Submit a slurm job that will print slurm env vars and execute 'id'
+#
+file delete $file_in $file_out
+make_bash_script $file_in "
+  env | grep SLURM | sort
+  $srun $bin_id"
+
+#
+# Submit a slurm job that will execute 'id' on 1 node and over task_cnt tasks
+#
+set timeout     $max_job_delay
+set matches     0
+set tasks       0
+spawn $salloc --tasks=$task_cnt --overcommit -N1 -t1 $file_in
+expect {
+	-re "Granted job allocation ($number)" {
+		set job_id $expect_out(1,string)
+		exp_continue
+	}
+	-re "SLURM_NPROCS=$task_cnt" {
+		incr matches
+		exp_continue
+	}
+	-re "SLURM_OVERCOMMIT=1" {
+		incr matches
+		exp_continue
+	}
+	-re "uid=" {
+		incr tasks
+		exp_continue
+	}
+	timeout {
+		send_user "\nFAILURE: sbatch not responding\n"
+		set exit_code 1
+	}
+	eof {
+		wait
+	}
+}
+if {$matches != 2} {
+	send_user "\nFAILURE: Did not set desired allocation env vars\n"
+	set exit_code 1
+}
+if {$task_cnt != $tasks} {
+	send_user "\nFAILURE: Did not get proper number of tasks: "
+	send_user "$task_cnt, $tasks\n"
+	set exit_code 1
+}
+
+if {$exit_code == 0} {
+	file delete $file_in $file_out
+	send_user "\nSUCCESS\n"
+}
+exit $exit_code
diff --git a/testsuite/expect/test17.32 b/testsuite/expect/test17.32
index 032c1dba2..b26af6949 100755
--- a/testsuite/expect/test17.32
+++ b/testsuite/expect/test17.32
@@ -41,7 +41,7 @@ set task_cnt    10
 print_header $test_id
 
 #
-# Submit a slurm job that will execute 'id' on 4 tasks (or try anyway)
+# Submit a slurm job that will print slurm env vars and execute 'id'
 #
 file delete $file_in $file_out
 make_bash_script $file_in "
@@ -110,11 +110,6 @@ if {$task_cnt != $tasks} {
 	set exit_code 1
 }
 
-if {$task_cnt != $tasks} {
-	send_user "\nFAILURE: Did not get proper number of tasks: "
-	send_user "$task_cnt, $tasks\n"
-	set exit_code 1
-}
 if {$exit_code == 0} {
 	file delete $file_in $file_out
 	send_user "\nSUCCESS\n"
diff --git a/testsuite/expect/test18.17 b/testsuite/expect/test18.17
index fc7388572..7ecf97965 100755
--- a/testsuite/expect/test18.17
+++ b/testsuite/expect/test18.17
@@ -76,7 +76,7 @@ set spawn_id $slaunch_spawn_id
 set sum 0
 expect {
 	-re "exit code ($number)" {
-		send_user "This error is expected, no worries\n"
+		send_user "\nThis error is expected, no worries\n"
 		incr sum $expect_out(1,string)
 		exp_continue
 	}
diff --git a/testsuite/expect/test7.5 b/testsuite/expect/test7.5
index 1bb6ff5f9..44a87f73c 100755
--- a/testsuite/expect/test7.5
+++ b/testsuite/expect/test7.5
@@ -50,14 +50,6 @@ set test_prog     "test$test_id.prog"
 
 print_header $test_id
 
-#
-# Put desired SLURM install directory at head of search path for bulk launch 
-# command to work (runs "slaunch" without path)
-#
-global env
-set env(PATH) "$slurm_dir/bin:$env(PATH)"
-send_user "\n $env(PATH)\n"
-
 #
 # Test for existence of mpi compiler and totalview
 #
@@ -81,6 +73,18 @@ if {[test_front_end] != 0} {
         send_user "\nWARNING: This test is incompatable with front-end systems\n"
         exit 0
 }
+if {[test_aix] == 1} {
+	send_user "WARNING: Test is incompatible with AIX\n"
+	exit 0
+}
+
+#
+# Put desired SLURM install directory at head of search path for bulk launch
+# command to work (runs "slaunch" without path)
+#
+global env
+set env(PATH) "$slurm_dir/bin:$env(PATH)"
+send_user "\n $env(PATH)\n"
 
 #
 # Delete left-over program and rebuild it
diff --git a/testsuite/expect/test7.6 b/testsuite/expect/test7.6
index c007e0c07..84eb5b7af 100755
--- a/testsuite/expect/test7.6
+++ b/testsuite/expect/test7.6
@@ -54,13 +54,6 @@ set no_bulk       "set issue_dgo false; dset TV::bulk_launch_enabled false"
 
 print_header $test_id
 
-#
-# Put desired SLURM install directory at head of search path for bulk launch 
-# command to work (runs "srun" without path)
-#
-set env(PATH) "$slurm_dir/bin:$env(PATH)"
-send_user "\n $env(PATH)\n"
-
 #
 # Test for existence of mpi compiler and totalview
 #
@@ -84,6 +77,17 @@ if {[test_front_end] != 0} {
         send_user "\nWARNING: This test is incompatable with front-end systems\n"
         exit 0
 }
+if {[test_aix] == 1} {
+	send_user "WARNING: Test is incompatible with AIX\n"
+	exit 0
+}
+
+#
+# Put desired SLURM install directory at head of search path for bulk launch
+# command to work (runs "srun" without path)
+#
+set env(PATH) "$slurm_dir/bin:$env(PATH)"
+send_user "\n $env(PATH)\n"
 
 #
 # Delete left-over program and rebuild it
-- 
GitLab