diff --git a/META b/META index 7061ad1f76192f37cdfffd26364b72c61b2e928f..60cb6677127dda29c05435513c9d5d17fce65178 100644 --- a/META +++ b/META @@ -3,9 +3,9 @@ Api_revision: 0 Major: 1 Meta: 1 - Micro: 22 + Micro: 23 Minor: 2 Name: slurm Release: 1 Release_tags: - Version: 1.2.22 + Version: 1.2.23 diff --git a/NEWS b/NEWS index 212ceda0a22478fbf8768e691b9c8be50abaf865..2cb6a555b8246a25bfa11a631f98d7614020286a 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,31 @@ documents those changes that are of interest to users and admins. * Changes in SLURM 1.2.23 ========================= + -- Fix for libpmi to not export unneeded variables like xstr* + -- BLUEGENE - added per partition dynamic block creation + -- fix infinite loop bug in sview when there were multiple partitions + -- Send message to srun command when a job is requeued due to node failure. + Note this will be overwritten in the output file unless JobFileAppend + is set in slurm.conf. In slurm version 1.3, srun's --open-mode=append + option will offer this control for each job. + -- Change a node's default TmpDisk from 1MB to 0MB and change job's default + disk space requirement from 1MB to 0MB. + -- In sched/wiki (Maui scheduler) specify a QOS (quality of service) by + specifying an account of the form "qos-name". + -- In select/linear, fix bug in scheduling required nodes that already have + a job running on them (req.load.patch from Chris Holmes, HP). + -- For use with Moab only: change timeout for srun/sbatch --get-user-env + option to 2 secs, don't get DISPLAY environment variables, but explicitly + set ENVIRONMENT=BATCH and HOSTNAME to the execution host of the batch script. + -- Add configuration parameter GetEnvTimeout for use with Moab. See + "man slurm.conf" for details. + -- Modify salloc and sbatch to accept both "--tasks" and "--ntasks" as + equivalent options for compatibility with srun. + -- If a partition's node list contains space separators, replace them with + commas for easier parsing. + -- BLUEGENE - fixed bug in geometry specs when creating a block. + -- Add support for Moab and Maui to start jobs with select/cons_res plugin + and jobs requiring more than one CPU per task. * Changes in SLURM 1.2.22 ========================= @@ -2790,4 +2815,4 @@ documents those changes that are of interest to users and admins. -- Change directory to /tmp in slurmd if daemonizing. -- Logfiles are reopened on reconfigure. -$Id: NEWS 13118 2008-01-29 19:09:00Z da $ +$Id: NEWS 13293 2008-02-15 21:51:16Z jette $ diff --git a/doc/html/configurator.html.in b/doc/html/configurator.html.in index 8b0faed7b7031b37a72a44c8b6741dc6b7f602e2..b7d09d968884ab511373b02607b77191ca33902b 100644 --- a/doc/html/configurator.html.in +++ b/doc/html/configurator.html.in @@ -159,6 +159,7 @@ function displayfile() "InactiveLimit=" + document.config.inactive_limit.value + "<br>" + "MinJobAge=" + document.config.min_job_age.value + "<br>" + "KillWait=" + document.config.kill_wait.value + "<br>" + + "#GetEnvTimeout=2 <br>" + "#UnkillableStepProgram= <br>" + "#UnkillableStepTimeout=60 <br>" + "Waittime=" + document.config.wait_time.value + "<br>" + diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index 15bf0201312f86b53a49cc6e9d8dab45908e10f9..e58121e2c43061c1761865692133789bb830f0ab 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -1,4 +1,4 @@ -\." $Id: srun.1 13066 2008-01-23 16:34:55Z jette $ +\." $Id: srun.1 13256 2008-02-12 22:02:36Z jette $ .\" .TH SRUN "1" "July 2007" "srun 1.2" "slurm components" @@ -335,6 +335,7 @@ use only one core in each physical CPU .B [no]multithread [don't] use extra threads with in-core multi-threading which can benefit communication intensive applications +.TP .B help show this help message .RE diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index b330630f1e8b05ef246a9f57e3141eac361d2f9b..43a9508727d550b35efeabebf834f5e1a6bda995 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -145,6 +145,14 @@ specific requested value. Job id values generated will incremented by 1 for each subsequent job. This may be used to provide a meta\-scheduler with a job id space which is disjoint from the interactive jobs. The default value is 1. + +.TP +\fBGetEnvTimeout\fR +Used for Moab scheduled jobs only. Controls how long job should wait +in seconds for loading the user's environment before attempting to +load it from a cache file. Applies when the srun or sbatch +\fI--get-user-env\fR option is used. Default value is 2 seconds. + .TP \fBHeartbeatInterval\fR Defunct paramter. @@ -950,7 +958,7 @@ system is purged as needed so that user jobs have access to most of this space. The Prolog and/or Epilog programs (specified in the configuration file) might be used to insure the file system is kept clean. -The default value is 1. +The default value is 0. .TP \fBWeight\fR The priority of the node for scheduling purposes. diff --git a/slurm.spec b/slurm.spec index de1d4ded7900b27aa0c0872f0c930fdfcc8aad41..5f7e05c94087d9b53dfa38ce63391e11052fcdae 100644 --- a/slurm.spec +++ b/slurm.spec @@ -1,4 +1,4 @@ -# $Id: slurm.spec 13075 2008-01-23 20:39:30Z da $ +# $Id: slurm.spec 13266 2008-02-13 21:54:50Z da $ # # Note that this package is not relocatable @@ -60,14 +60,14 @@ %endif Name: slurm -Version: 1.2.22 +Version: 1.2.23 Release: 1 Summary: Simple Linux Utility for Resource Management License: GPL Group: System Environment/Base -Source: slurm-1.2.22.tar.bz2 +Source: slurm-1.2.23.tar.bz2 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release} URL: https://computing.llnl.gov/linux/slurm/ BuildRequires: openssl-devel >= 0.9.6 openssl >= 0.9.6 @@ -211,7 +211,7 @@ SLURM process tracking plugin for SGI job containers. ############################################################################# %prep -%setup -n slurm-1.2.22 +%setup -n slurm-1.2.23 %build %configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \ @@ -219,7 +219,7 @@ SLURM process tracking plugin for SGI job containers. %{?with_proctrack} \ %{?with_ssl} \ %{?with_munge} \ - %{!?with_readline:--without-readline} \ + %{!?slurm_with_readline:--without-readline} \ %{?with_cflags} make %{?_smp_mflags} diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index fa7596d071c40e00d24f600e7628c1a01ed767cd..7f76820d69042b95ded602167ded5015018c424f 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -868,6 +868,7 @@ typedef struct slurm_ctl_conf { char *job_comp_type; /* job completion logger type */ char *job_comp_loc; /* job completion logging location */ uint16_t job_file_append; /* if set, append to stdout/err file */ + uint16_t get_env_timeout; /* secs allowed for srun --get-user-env */ uint16_t kill_wait; /* seconds between SIGXCPU to SIGKILL * on job termination */ char *mail_prog; /* pathname of mail program */ diff --git a/src/api/Makefile.am b/src/api/Makefile.am index 6aeddd8699f614fef3f3dcb098dc456125b7b501..819f5bd2c47d09355e9cfd077668e35f0755f651 100644 --- a/src/api/Makefile.am +++ b/src/api/Makefile.am @@ -10,6 +10,10 @@ VERSION_SCRIPT = \ version.map OTHER_FLAGS = \ -Wl,--version-script=$(VERSION_SCRIPT) +PMI_VERSION_SCRIPT = \ + pmi_version.map +PMI_OTHER_FLAGS = \ + -Wl,--version-script=$(PMI_VERSION_SCRIPT) endif # libslurm version information : @@ -46,7 +50,7 @@ lib_LTLIBRARIES = libslurm.la libpmi.la # need to be built before anything else. For instance, this takes care # of libpmi.la's dependency on libslurm.la, as long as you use "make" # and NOT "make libpmi.la". -BUILT_SOURCES = $(VERSION_SCRIPT) libslurm.la +BUILT_SOURCES = $(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT) libslurm.la # Note that libslurmhelper is mostly the same as libslurm, except that # it exports ALL symbols, including those from libcommon, libeio, etc. @@ -101,8 +105,9 @@ libslurm_la_LDFLAGS = \ $(OTHER_FLAGS) libpmi_la_SOURCES = pmi.c -libpmi_la_LIBADD = $(convenience_libs) -libpmi_la_LDFLAGS = $(LIB_LDFLAGS) +#libpmi_la_LIBADD = $(convenience_libs) +libpmi_la_LDFLAGS = $(LIB_LDFLAGS) \ + $(PMI_OTHER_FLAGS) force: $(convenience_libs) : force @@ -119,8 +124,14 @@ $(VERSION_SCRIPT) : echo " local: *;"; \ echo "};") > $(VERSION_SCRIPT) +$(PMI_VERSION_SCRIPT) : + (echo "{ global:"; \ + echo " PMI_*;"; \ + echo " local: *;"; \ + echo "};") > $(PMI_VERSION_SCRIPT) + CLEANFILES = \ - $(VERSION_SCRIPT) + $(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT) DISTCLEANFILES = \ - $(VERSION_SCRIPT) + $(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT) diff --git a/src/api/Makefile.in b/src/api/Makefile.in index 4e9f9cfe6b75c957a9f2e949d16feeb8addaf491..2d94ff509015fd52153f80e14bce785c5f5305e6 100644 --- a/src/api/Makefile.in +++ b/src/api/Makefile.in @@ -74,7 +74,7 @@ am__strip_dir = `echo $$p | sed -e 's|^.*/||'`; am__installdirs = "$(DESTDIR)$(libdir)" libLTLIBRARIES_INSTALL = $(INSTALL) LTLIBRARIES = $(lib_LTLIBRARIES) $(noinst_LTLIBRARIES) -libpmi_la_DEPENDENCIES = $(convenience_libs) +libpmi_la_LIBADD = am_libpmi_la_OBJECTS = pmi.lo libpmi_la_OBJECTS = $(am_libpmi_la_OBJECTS) libpmi_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ @@ -280,6 +280,12 @@ INCLUDES = -I$(top_srcdir) @WITH_GNU_LD_TRUE@OTHER_FLAGS = \ @WITH_GNU_LD_TRUE@ -Wl,--version-script=$(VERSION_SCRIPT) +@WITH_GNU_LD_TRUE@PMI_VERSION_SCRIPT = \ +@WITH_GNU_LD_TRUE@ pmi_version.map + +@WITH_GNU_LD_TRUE@PMI_OTHER_FLAGS = \ +@WITH_GNU_LD_TRUE@ -Wl,--version-script=$(PMI_VERSION_SCRIPT) + # libslurm version information : # @@ -314,7 +320,7 @@ lib_LTLIBRARIES = libslurm.la libpmi.la # need to be built before anything else. For instance, this takes care # of libpmi.la's dependency on libslurm.la, as long as you use "make" # and NOT "make libpmi.la". -BUILT_SOURCES = $(VERSION_SCRIPT) libslurm.la +BUILT_SOURCES = $(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT) libslurm.la # Note that libslurmhelper is mostly the same as libslurm, except that # it exports ALL symbols, including those from libcommon, libeio, etc. @@ -366,13 +372,15 @@ libslurm_la_LDFLAGS = \ $(OTHER_FLAGS) libpmi_la_SOURCES = pmi.c -libpmi_la_LIBADD = $(convenience_libs) -libpmi_la_LDFLAGS = $(LIB_LDFLAGS) +#libpmi_la_LIBADD = $(convenience_libs) +libpmi_la_LDFLAGS = $(LIB_LDFLAGS) \ + $(PMI_OTHER_FLAGS) + CLEANFILES = \ - $(VERSION_SCRIPT) + $(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT) DISTCLEANFILES = \ - $(VERSION_SCRIPT) + $(VERSION_SCRIPT) $(PMI_VERSION_SCRIPT) all: $(BUILT_SOURCES) $(MAKE) $(AM_MAKEFLAGS) all-am @@ -707,6 +715,12 @@ $(VERSION_SCRIPT) : echo " client_io_handler_*;"; \ echo " local: *;"; \ echo "};") > $(VERSION_SCRIPT) + +$(PMI_VERSION_SCRIPT) : + (echo "{ global:"; \ + echo " PMI_*;"; \ + echo " local: *;"; \ + echo "};") > $(PMI_VERSION_SCRIPT) # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. .NOEXPORT: diff --git a/src/common/env.c b/src/common/env.c index 741152a0c14d7d976536e588c0247a7144c241a9..424faa2773bab5d791412e1752ce9e6b8b92f4ea 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/common/env.c - add an environment variable to environment vector - * $Id: env.c 12970 2008-01-07 20:16:53Z jette $ + * $Id: env.c 13237 2008-02-08 23:16:16Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -53,6 +53,7 @@ #include "slurm/slurm.h" #include "src/common/log.h" #include "src/common/env.h" +#include "src/common/read_config.h" #include "src/common/xassert.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" @@ -75,8 +76,6 @@ strong_alias(env_array_append_fmt, slurm_env_array_append_fmt); strong_alias(env_array_overwrite, slurm_env_array_overwrite); strong_alias(env_array_overwrite_fmt, slurm_env_array_overwrite_fmt); -#define SU_WAIT_MSEC 8000 /* 8000 msec for /bin/su to return user - * env vars for --get-user-env option */ #define ENV_BUFSIZE (64 * 1024) /* @@ -132,6 +131,18 @@ _extend_env(char ***envp) return (++ep); } +/* return true if the environment variables should not be set for + * srun's --get-user-env option */ +static bool _discard_env(char *name, char *value) +{ + if ((strcmp(name, "DISPLAY") == 0) || + (strcmp(name, "ENVIRONMENT") == 0) || + (strcmp(name, "HOSTNAME") == 0)) + return true; + + return false; +} + /* * Return the number of elements in the environment `env' */ @@ -787,6 +798,7 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc) * SLURM_JOB_NODELIST * SLURM_JOB_CPUS_PER_NODE * ENVIRONMENT=BATCH + * HOSTNAME * LOADLBATCH (AIX only) * * Sets OBSOLETE variables: @@ -796,8 +808,9 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc) * SLURM_TASKS_PER_NODE <- poorly named, really CPUs per node * ? probably only needed for users... */ -void -env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch) +extern void +env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, + const char *node_name) { char *tmp; uint32_t num_nodes = 0; @@ -817,6 +830,8 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch) batch->cpu_count_reps); env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp); env_array_overwrite_fmt(dest, "ENVIRONMENT", "BATCH"); + if (node_name) + env_array_overwrite_fmt(dest, "HOSTNAME", "%s", node_name); #ifdef HAVE_AIX /* this puts the "poe" command into batch mode */ env_array_overwrite(dest, "LOADLBATCH", "yes"); @@ -1254,9 +1269,10 @@ char **_load_env_cache(const char *username) if (!fgets(line, ENV_BUFSIZE, fp)) break; _strip_cr_nl(line); - _env_array_entry_splitter(line, name, ENV_BUFSIZE, value, - ENV_BUFSIZE); - env_array_overwrite(&env, name, value); + if (_env_array_entry_splitter(line, name, ENV_BUFSIZE, value, + ENV_BUFSIZE) && + (!_discard_env(name, value))) + env_array_overwrite(&env, name, value); } fclose(fp); return env; @@ -1271,7 +1287,7 @@ char **_load_env_cache(const char *username) * 2. Load the user environment from a cache file. This is used * in the event that option 1 times out. * - * timeout value is in seconds or zero for default (8 secs) + * timeout value is in seconds or zero for default (2 secs) * mode is 1 for short ("su <user>"), 2 for long ("su - <user>") * On error, returns NULL. * @@ -1293,7 +1309,7 @@ char **env_array_user_default(const char *username, int timeout, int mode) struct pollfd ufds; if (geteuid() != (uid_t)0) { - info("WARNING: you must be root to use --get-user-env"); + fatal("WARNING: you must be root to use --get-user-env"); return NULL; } @@ -1340,15 +1356,14 @@ char **env_array_user_default(const char *username, int timeout, int mode) ufds.events = POLLIN; /* Read all of the output from /bin/su into buffer */ + if ((timeout == 0) && ((timeout = slurm_get_env_timeout()) == 0)) + timeleft = DEFAULT_GET_ENV_TIMEOUT; found = 0; buf_read = 0; bzero(buffer, sizeof(buffer)); while (1) { gettimeofday(&now, NULL); - if (timeout) - timeleft = timeout * 1000; - else - timeleft = SU_WAIT_MSEC; + timeleft = timeout * 1000; timeleft -= (now.tv_sec - begin.tv_sec) * 1000; timeleft -= (now.tv_usec - begin.tv_usec) / 1000; if (timeleft <= 0) { @@ -1394,7 +1409,7 @@ char **env_array_user_default(const char *username, int timeout, int mode) close(fildes[0]); if (!found) { error("Failed to load current user environment variables"); - _load_env_cache(username); + return _load_env_cache(username); } /* First look for the start token in the output */ @@ -1424,15 +1439,16 @@ char **env_array_user_default(const char *username, int timeout, int mode) break; } if (_env_array_entry_splitter(line, name, sizeof(name), - value, sizeof(value))) + value, sizeof(value)) && + (!_discard_env(name, value))) env_array_overwrite(&env, name, value); line = strtok_r(NULL, "\n", &last); } if (!found) { error("Failed to get all user environment variables"); + env_array_free(env); return _load_env_cache(username); } return env; } - diff --git a/src/common/env.h b/src/common/env.h index 94000ed24f202354845e431ba3c42b5d84fa4925..3f8901d66565847d0ecda3680c5f93be959fe611 100644 --- a/src/common/env.h +++ b/src/common/env.h @@ -116,6 +116,7 @@ void env_array_for_job(char ***dest, * SLURM_JOB_NODELIST * SLURM_JOB_CPUS_PER_NODE * ENVIRONMENT=BATCH + * HOSTNAME * LOADLBATCH (AIX only) * * Sets OBSOLETE variables: @@ -125,7 +126,9 @@ void env_array_for_job(char ***dest, * SLURM_TASKS_PER_NODE <- poorly named, really CPUs per node * ? probably only needed for users... */ -void env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch); +extern void env_array_for_batch_job(char ***dest, + const batch_job_launch_msg_t *batch, + const char* node_name); /* * Set in "dest the environment variables relevant to a SLURM job step, diff --git a/src/common/hostlist.c b/src/common/hostlist.c index 36b489af1844a10cf32b1cd9987a308d8a555c11..19f340942d715b26125b95caa7cb57b5c5df01f0 100644 --- a/src/common/hostlist.c +++ b/src/common/hostlist.c @@ -1,5 +1,5 @@ /*****************************************************************************\ - * $Id: hostlist.c 12632 2007-11-06 23:27:07Z da $ + * $Id: hostlist.c 13270 2008-02-14 19:40:44Z da $ ***************************************************************************** * $LSDId: hostlist.c,v 1.14 2003/10/14 20:11:54 grondo Exp $ ***************************************************************************** @@ -1558,7 +1558,8 @@ error: * RET 1 if str contained a valid number or range, * 0 if conversion of str to a range failed. */ -static int _parse_box_range(char *str, struct _range *ranges, int len, int *count) +static int _parse_box_range(char *str, struct _range *ranges, + int len, int *count) { int a[3], b[3], i1, i2, i; char new_str[8]; @@ -2411,14 +2412,28 @@ _get_boxes(char *buf, int max_len) start_box = i; end_box = i; } + + if (((len+8) < max_len) && (start_box != -1) && ((is_box == 0) || (i == axis_max_x))) { - sprintf(buf+len,"%c%c%cx%c%c%c,", - alpha_num[start_box], alpha_num[axis_min_y], - alpha_num[axis_min_z], - alpha_num[end_box], alpha_num[axis_max_y], - alpha_num[axis_max_z]); - len += 8; + if(start_box == end_box + && axis_min_y == axis_max_y + && axis_min_z == axis_max_z) { + sprintf(buf+len,"%c%c%c,", + alpha_num[start_box], + alpha_num[axis_min_y], + alpha_num[axis_min_z]); + len += 4; + } else { + sprintf(buf+len,"%c%c%cx%c%c%c,", + alpha_num[start_box], + alpha_num[axis_min_y], + alpha_num[axis_min_z], + alpha_num[end_box], + alpha_num[axis_max_y], + alpha_num[axis_max_z]); + len += 8; + } start_box = -1; end_box = -1; } diff --git a/src/common/node_select.c b/src/common/node_select.c index 2f9a57d0cb56856cd0d57f96ce6d211bb85fb2d5..ede1c38fcfb1b80684b1afd52ed25c7b35e88c4f 100644 --- a/src/common/node_select.c +++ b/src/common/node_select.c @@ -9,7 +9,7 @@ * the plugin. This is because functions required by the plugin can not be * resolved on the front-end nodes, so we can't load the plugins there. * - * $Id: node_select.c 12627 2007-11-06 19:48:55Z jette $ + * $Id: node_select.c 13270 2008-02-14 19:40:44Z da $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -1114,16 +1114,20 @@ extern char *select_g_sprint_jobinfo(select_jobinfo_t jobinfo, sprintf(start_char, "None"); else { snprintf(start_char, sizeof(start_char), - "%1ux%1ux%1u", jobinfo->start[0], - jobinfo->start[1], jobinfo->start[2]); + "%cx%cx%c", + alpha_num[jobinfo->start[0]], + alpha_num[jobinfo->start[1]], + alpha_num[jobinfo->start[2]]); } snprintf(buf, size, - "%7.7s %6.6s %6.6s %9s %1ux%1ux%1u %5s %-16s", + "%7.7s %6.6s %6.6s %9s %cx%cx%c %5s %-16s", _job_conn_type_string(jobinfo->conn_type), _yes_no_string(jobinfo->reboot), _yes_no_string(jobinfo->rotate), max_procs_char, - geometry[0], geometry[1], geometry[2], + alpha_num[geometry[0]], + alpha_num[geometry[1]], + alpha_num[geometry[2]], start_char, jobinfo->bg_block_id); break; case SELECT_PRINT_MIXED: @@ -1137,18 +1141,22 @@ extern char *select_g_sprint_jobinfo(select_jobinfo_t jobinfo, sprintf(start_char, "None"); else { snprintf(start_char, sizeof(start_char), - "%1ux%1ux%1u", jobinfo->start[0], - jobinfo->start[1], jobinfo->start[2]); + "%cx%cx%c", + alpha_num[jobinfo->start[0]], + alpha_num[jobinfo->start[1]], + alpha_num[jobinfo->start[2]]); } snprintf(buf, size, "Connection=%s Reboot=%s Rotate=%s MaxProcs=%s " - "Geometry=%1ux%1ux%1u Start=%s Block_ID=%s", + "Geometry=%cx%cx%c Start=%s Block_ID=%s", _job_conn_type_string(jobinfo->conn_type), _yes_no_string(jobinfo->reboot), _yes_no_string(jobinfo->rotate), max_procs_char, - geometry[0], geometry[1], geometry[2], + alpha_num[geometry[0]], + alpha_num[geometry[1]], + alpha_num[geometry[2]], start_char, jobinfo->bg_block_id); break; case SELECT_PRINT_BG_ID: @@ -1167,16 +1175,20 @@ extern char *select_g_sprint_jobinfo(select_jobinfo_t jobinfo, _yes_no_string(jobinfo->rotate)); break; case SELECT_PRINT_GEOMETRY: - snprintf(buf, size, "%1ux%1ux%1u", - geometry[0], geometry[1], geometry[2]); + snprintf(buf, size, "%cx%cx%c", + alpha_num[geometry[0]], + alpha_num[geometry[1]], + alpha_num[geometry[2]]); break; case SELECT_PRINT_START: if (jobinfo->start[0] == (uint16_t) NO_VAL) sprintf(buf, "None"); else { snprintf(buf, size, - "%1ux%1ux%1u", jobinfo->start[0], - jobinfo->start[1], jobinfo->start[2]); + "%cx%cx%c", + alpha_num[jobinfo->start[0]], + alpha_num[jobinfo->start[1]], + alpha_num[jobinfo->start[2]]); } case SELECT_PRINT_MAX_PROCS: if (jobinfo->max_procs == NO_VAL) diff --git a/src/common/read_config.c b/src/common/read_config.c index 01976c812a94bd3fe7bb6aead329a88118917b1f..79d44156de5ab70d7d04b4efd6bbe7257785bbcd 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -140,6 +140,7 @@ s_p_options_t slurm_conf_options[] = { {"JobCredentialPrivateKey", S_P_STRING}, {"JobCredentialPublicCertificate", S_P_STRING}, {"JobFileAppend", S_P_UINT16}, + {"GetEnvTimeout", S_P_UINT16}, {"KillTree", S_P_UINT16, defunct_option}, {"KillWait", S_P_UINT16}, {"MailProg", S_P_STRING}, @@ -315,7 +316,7 @@ static int parse_nodename(void **dest, slurm_parser_enum_t type, if (!s_p_get_uint32(&n->tmp_disk, "TmpDisk", tbl) && !s_p_get_uint32(&n->tmp_disk, "TmpDisk", dflt)) - n->tmp_disk = 1; + n->tmp_disk = 0; if (!s_p_get_uint32(&n->weight, "Weight", tbl) && !s_p_get_uint32(&n->weight, "Weight", dflt)) @@ -456,6 +457,13 @@ static int parse_partitionname(void **dest, slurm_parser_enum_t type, if (!s_p_get_string(&p->nodes, "Nodes", tbl) && !s_p_get_string(&p->nodes, "Nodes", dflt)) p->nodes = NULL; + else { + int i; + for (i=0; p->nodes[i]; i++) { + if (isspace(p->nodes[i])) + p->nodes[i] = ','; + } + } if (!s_p_get_boolean(&p->root_only_flag, "RootOnly", tbl) && !s_p_get_boolean(&p->root_only_flag, "RootOnly", dflt)) @@ -1432,6 +1440,9 @@ validate_and_set_defaults(slurm_ctl_conf_t *conf, s_p_hashtbl_t *hashtbl) if (!s_p_get_uint16(&conf->job_file_append, "JobFileAppend", hashtbl)) conf->job_file_append = 0; + if (!s_p_get_uint16(&conf->get_env_timeout, "GetEnvTimeout", hashtbl)) + conf->get_env_timeout = DEFAULT_GET_ENV_TIMEOUT; + if (!s_p_get_uint16(&conf->kill_wait, "KillWait", hashtbl)) conf->kill_wait = DEFAULT_KILL_WAIT; diff --git a/src/common/read_config.h b/src/common/read_config.h index 6817fe34ab109bf7f2c060ad043991eb9147ce15..70215d19926e7507becde5773613d36c9ddd85a1 100644 --- a/src/common/read_config.h +++ b/src/common/read_config.h @@ -52,6 +52,7 @@ extern char *default_plugstack; #define DEFAULT_CACHE_GROUPS 0 #define DEFAULT_FAST_SCHEDULE 1 #define DEFAULT_FIRST_JOB_ID 1 +#define DEFAULT_GET_ENV_TIMEOUT 2 /* NOTE: DEFAULT_INACTIVE_LIMIT must be 0 for Blue Gene/L systems */ #define DEFAULT_INACTIVE_LIMIT 0 #define DEFAULT_JOB_ACCT_LOGFILE "/var/log/slurm_accounting.log" diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 550e855148bc9dca7bffd87449c0d74c29a5cd68..d8d87d88cd3167e575df0c5987dcfc1ca8ee8ef9 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -182,6 +182,20 @@ void slurm_api_clear_config(void) /* slurm_mutex_lock(&config_lock); */ /* } */ +/* slurm_get_env_timeout + * return default timeout for srun/sbatch --get-user-env option + */ +int inline slurm_get_env_timeout(void) +{ + int timeout; + slurm_ctl_conf_t *conf; + + conf = slurm_conf_lock(); + timeout = conf->get_env_timeout; + slurm_conf_unlock(); + return timeout; +} + /* slurm_get_mpi_default * get default mpi value from slurmctld_conf object * RET char * - mpi default value from slurm.conf, MUST be xfreed by caller diff --git a/src/common/slurm_protocol_api.h b/src/common/slurm_protocol_api.h index bd5614c56b4f7efd8d8edcc009b3c1710be746fd..10c19e124a8906530dddc99c7861f8a2d560673c 100644 --- a/src/common/slurm_protocol_api.h +++ b/src/common/slurm_protocol_api.h @@ -96,6 +96,11 @@ int inline slurm_set_api_config(slurm_protocol_config_t * protocol_conf); */ inline slurm_protocol_config_t *slurm_get_api_config(); +/* slurm_get_env_timeout + * return default timeout for srun/sbatch --get-user-env option + */ +int inline slurm_get_env_timeout(void); + /* slurm_get_mpi_default * get default mpi value from slurmctld_conf object * RET char * - mpi default value from slurm.conf, MUST be xfreed by caller diff --git a/src/common/slurm_step_layout.c b/src/common/slurm_step_layout.c index 555f7d65fea78b9d956efa60452a18bcb44142aa..e49e01dc443aa7cbc81fb2c34d86fb8f5401043f 100644 --- a/src/common/slurm_step_layout.c +++ b/src/common/slurm_step_layout.c @@ -679,12 +679,12 @@ static int _task_layout_plane(slurm_step_layout_t *step_layout, return SLURM_ERROR; } +#if(0) + /* debugging only */ for (i=0; i < step_layout->node_cnt; i++) { info("tasks[%d]: %u", i, step_layout->tasks[i]); } -#if(0) - /* debugging only */ for (i=0; i < step_layout->node_cnt; i++) { info ("Host %d _plane_ # of tasks %u", i, step_layout->tasks[i]); for (j=0; j<step_layout->tasks[i]; j++) { diff --git a/src/plugins/mpi/mvapich/mvapich.c b/src/plugins/mpi/mvapich/mvapich.c index ac07bedab2c97337be5d166b0b1a3cf2c8124b8a..38f1f5d8ecab0bd3c67861158089aa08f123ca6e 100644 --- a/src/plugins/mpi/mvapich/mvapich.c +++ b/src/plugins/mpi/mvapich/mvapich.c @@ -320,8 +320,12 @@ static int mvapich_read_n (mvapich_state_t *st, struct mvapich_info *mvi, return (-1); } - if (n == 0) /* unexpected EOF */ + if (n == 0) { /* unexpected EOF */ + error ("mvapich: rank %d: " + "Unexpected EOF (%dB left to read)", + mvi->rank, nleft); return (-1); + } nleft -= n; p += n; @@ -554,52 +558,55 @@ static void mvapich_bcast_hostids (mvapich_state_t *st) } /* Write size bytes from buf into socket for rank */ -static void mvapich_send (mvapich_state_t *st, void* buf, int size, int rank) +static int mvapich_send (mvapich_state_t *st, void* buf, int size, int rank) { struct mvapich_info *mvi = st->mvarray [rank]; - if (mvapich_write_n (st, mvi, buf, size) < 0) - error ("mvapich: write hostid rank %d: %m", mvi->rank); + return (mvapich_write_n (st, mvi, buf, size)); } /* Read size bytes from socket for rank into buf */ -static void mvapich_recv (mvapich_state_t *st, void* buf, int size, int rank) +static int mvapich_recv (mvapich_state_t *st, void* buf, int size, int rank) { struct mvapich_info *mvi = st->mvarray [rank]; - if (mvapich_read_n (st, mvi, buf, size) <= 0) - error("mvapich reading from %d: %m", mvi->rank); -} - -/* Read an integer from socket for rank */ -static int mvapich_recv_int (mvapich_state_t *st, int rank) -{ - int buf; - mvapich_recv(st, &buf, sizeof(buf), rank); - return buf; + return (mvapich_read_n (st, mvi, buf, size)); } /* Scatter data in buf to ranks using chunks of size bytes */ -static void mvapich_scatterbcast (mvapich_state_t *st, void* buf, int size) +static int mvapich_scatterbcast (mvapich_state_t *st, void* buf, int size) { - int i; - for (i = 0; i < st->nprocs; i++) - mvapich_send(st, buf + i*size, size, i); + int i, rc; + int n = 0; + + for (i = 0; i < st->nprocs; i++) { + if ((rc = mvapich_send (st, buf + i*size, size, i)) <= 0) + return (-1); + n += rc; + } + return (n); } /* Broadcast buf to each rank, which is size bytes big */ -static void mvapich_allgatherbcast (mvapich_state_t *st, void* buf, int size) +static int mvapich_allgatherbcast (mvapich_state_t *st, void* buf, int size) { - int i; - for (i = 0; i < st->nprocs; i++) - mvapich_send(st, buf, size, i); + int i, rc; + int n = 0; + + for (i = 0; i < st->nprocs; i++) { + if ((rc = mvapich_send (st, buf, size, i)) <= 0) + return (-1); + n += rc; + } + return (n); } /* Perform alltoall using data in buf with elements of size bytes */ -static void mvapich_alltoallbcast (mvapich_state_t *st, void* buf, int size) +static int mvapich_alltoallbcast (mvapich_state_t *st, void* buf, int size) { int pbufsize = size * st->nprocs; void* pbuf = xmalloc(pbufsize); + int i, src, rc; + int n = 0; - int i, src; for (i = 0; i < st->nprocs; i++) { for (src = 0; src < st->nprocs; src++) { memcpy( pbuf + size*src, @@ -607,22 +614,141 @@ static void mvapich_alltoallbcast (mvapich_state_t *st, void* buf, int size) size ); } - mvapich_send(st, pbuf, pbufsize, i); + if ((rc = mvapich_send (st, pbuf, pbufsize, i)) <= 0) + goto out; + n += rc; } + out: xfree(pbuf); + return (rc < 0 ? rc : n); +} + +static int recv_common_value (mvapich_state_t *st, int *valp, int rank) +{ + int val; + if (mvapich_recv (st, &val, sizeof (int), rank) <= 0) { + error ("mvapich: recv: rank %d: %m\n", rank); + return (-1); + } + + /* + * If value is uninitialized, set it to current value, + * otherwise ensure that current value matches previous + */ + if (*valp == -1) + *valp = val; + else if (val != *valp) { + error ("mvapich: PMGR: unexpected value from rank %d: " + "expected %d, recvd %d", rank, *valp, val); + return (-1); + } + return (0); +} + +/* + * PMGR_BCAST (root, size of message, then message data (from root only)) + */ +static int process_pmgr_bcast (mvapich_state_t *st, int *rootp, int *sizep, + void ** bufp, int rank) +{ + if (recv_common_value (st, rootp, rank) < 0) + return (-1); + if (recv_common_value (st, sizep, rank) < 0) + return (-1); + if (rank != *rootp) + return (0); + + /* + * Recv data from root + */ + *bufp = xmalloc (*sizep); + if (mvapich_recv (st, *bufp, *sizep, rank) < 0) { + error ("mvapich: PMGR_BCAST: Failed to recv from root: %m"); + return (-1); + } + return (0); +} + +/* + * PMGR_GATHER (root, size of message, then message data) + */ +static int process_pmgr_gather (mvapich_state_t *st, int *rootp, + int *sizep, void **bufp, int rank) +{ + if (recv_common_value (st, rootp, rank) < 0) + return (-1); + if (recv_common_value (st, sizep, rank) < 0) + return (-1); + if (*bufp == NULL) + *bufp = xmalloc (*sizep * st->nprocs); + + if (mvapich_recv(st, (*bufp) + (*sizep)*rank, *sizep, rank) < 0) { + error ("mvapich: PMGR_/GATHER: rank %d: recv: %m", rank); + return (-1); + } + return (0); } -/* Check that new == curr value if curr has been initialized */ -static int set_current (int curr, int new) +/* + * PMGR_SCATTER (root, size of message, then message data) + */ +static int process_pmgr_scatter (mvapich_state_t *st, int *rootp, + int *sizep, void **bufp, int rank) { - if (curr == -1) - curr = new; - if (new != curr) { - error("PMGR unexpected value: received %d, expecting %d", - new, curr); + if (recv_common_value (st, rootp, rank) < 0) + return (-1); + if (recv_common_value (st, sizep, rank) < 0) + return (-1); + if (rank != *rootp) + return (0); + + if (*bufp == NULL) + *bufp = xmalloc (*sizep * st->nprocs); + + if (mvapich_recv(st, *bufp, (*sizep) * st->nprocs, rank) < 0) { + error ("mvapich: PMGR_SCATTER: rank %d: recv: %m", rank); + return (-1); } - return curr; + return (0); +} + +/* + * PMGR_ALLGATHER (size of message, then message data) + */ +static int process_pmgr_allgather (mvapich_state_t *st, int *sizep, + void **bufp, int rank) +{ + if (recv_common_value (st, sizep, rank) < 0) + return (-1); + if (*bufp == NULL) + *bufp = xmalloc (*sizep * st->nprocs); + if (mvapich_recv (st, (*bufp) + *sizep*rank, *sizep, rank) < 0) { + error ("mvapich: PMGR_ALLGATHER: rank %d: %m", rank); + return (-1); + } + return (0); +} + +/* + * PMGR_ALLTOALL (size of message, then message data) + */ +static int process_pmgr_alltoall (mvapich_state_t *st, int *sizep, + void **bufp, int rank) +{ + if (recv_common_value (st, sizep, rank) < 0) + return (-1); + + if (*bufp == NULL) + *bufp = xmalloc (*sizep * st->nprocs * st->nprocs); + if (mvapich_recv ( st, + *bufp + (*sizep * st->nprocs)*rank, + *sizep * st->nprocs, rank ) < 0) { + error ("mvapich: PMGR_ALLTOALL: recv: rank %d: %m", rank); + return (-1); + } + + return (0); } /* @@ -643,7 +769,7 @@ static int set_current (int curr, int new) * Note: Although there are op codes available for PMGR_OPEN and * PMGR_ABORT, neither is fully implemented and should not be used. */ -static void mvapich_processops (mvapich_state_t *st) +static int mvapich_processops (mvapich_state_t *st) { /* Until a 'CLOSE' or 'ABORT' message is seen, we continuously * loop processing ops @@ -663,57 +789,57 @@ static void mvapich_processops (mvapich_state_t *st) struct mvapich_info *mvi = st->mvarray [i]; // read in opcode - opcode = set_current(opcode, mvapich_recv_int(st, i)); + if (recv_common_value (st, &opcode, i) < 0) { + error ("mvapich: rank %d: Failed to read opcode: %m", + mvi->rank); + return (-1); + } // read in additional data depending on current opcode int rank, code; switch(opcode) { case 0: // PMGR_OPEN (followed by rank) - rank = mvapich_recv_int(st, i); + if (mvapich_recv (st, &rank, sizeof (int), i) <= 0) { + error ("mvapich: PMGR_OPEN: recv: %m"); + exit = 1; + } break; case 1: // PMGR_CLOSE (no data, close the socket) close(mvi->fd); break; case 2: // PMGR_ABORT (followed by exit code) - code = mvapich_recv_int(st, i); + if (mvapich_recv (st, &code, sizeof (int), i) <= 0) { + error ("mvapich: PMGR_ABORT: recv: %m"); + } error("mvapich abort with code %d from rank %d", code, i); break; case 3: // PMGR_BARRIER (no data) break; - case 4: // PMGR_BCAST (root, size of message, - // then message data (from root only)) - root = set_current(root, mvapich_recv_int(st, i)); - size = set_current(size, mvapich_recv_int(st, i)); - if (!buf) buf = (void*) xmalloc(size); - if (i == root) mvapich_recv(st, buf, size, i); + case 4: // PMGR_BCAST + if (process_pmgr_bcast (st, &root, &size, &buf, i) < 0) + return (-1); break; - case 5: // PMGR_GATHER (root, size of message, - // then message data) - root = set_current(root, mvapich_recv_int(st, i)); - size = set_current(size, mvapich_recv_int(st, i)); - if (!buf) buf = (void*) xmalloc(size * st->nprocs); - mvapich_recv(st, buf + size*i, size, i); + case 5: // PMGR_GATHER + if (process_pmgr_gather (st, &root, &size, &buf, i) < 0) + return (-1); break; - case 6: // PMGR_SCATTER (root, size of message, - // then message data) - root = set_current(root, mvapich_recv_int(st, i)); - size = set_current(size, mvapich_recv_int(st, i)); - if (!buf) buf = (void*) xmalloc(size * st->nprocs); - if (i == root) mvapich_recv(st, buf, size * st->nprocs, i); + case 6: // PMGR_SCATTER + if (process_pmgr_scatter (st, &root, + &size, &buf, i) < 0) + return (-1); break; - case 7: // PMGR_ALLGATHER (size of message, then message data) - size = set_current(size, mvapich_recv_int(st, i)); - if (!buf) buf = (void*) xmalloc(size * st->nprocs); - mvapich_recv(st, buf + size*i, size, i); + case 7: // PMGR_ALLGATHER + if (process_pmgr_allgather (st, &size, &buf, i) < 0) + return (-1); break; - case 8: // PMGR_ALLTOALL (size of message, then message data) - size = set_current(size, mvapich_recv_int(st, i)); - if (!buf) buf = (void*) xmalloc(size * st->nprocs * st->nprocs); - mvapich_recv(st, buf + (size*st->nprocs)*i, size * st->nprocs, i); + case 8: // PMGR_ALLTOALL + if (process_pmgr_alltoall (st, &size, &buf, i) < 0) + return (-1); break; default: error("Unrecognized PMGR opcode: %d", opcode); + return (-1); } } @@ -767,6 +893,7 @@ static void mvapich_processops (mvapich_state_t *st) xfree(buf); } // while(!exit) mvapich_debug ("Completed processing PMGR opcodes"); + return (0); } static void mvapich_bcast (mvapich_state_t *st) @@ -1158,7 +1285,8 @@ again: } if (st->protocol_version == 8) { - mvapich_processops(st); + if (mvapich_processops(st) < 0) + goto fail; } else { mvapich_debug ("bcasting mvapich info to %d tasks", st->nprocs); mvapich_bcast (st); diff --git a/src/plugins/sched/wiki/get_jobs.c b/src/plugins/sched/wiki/get_jobs.c index 70c9d91aa8a9107333ca3259bc82238bbe939edf..6c6fd1f626b198e50007562da7f232e07c224e6b 100644 --- a/src/plugins/sched/wiki/get_jobs.c +++ b/src/plugins/sched/wiki/get_jobs.c @@ -301,8 +301,14 @@ static char * _dump_job(struct job_record *job_ptr, int state_info) } if (job_ptr->account) { - snprintf(tmp, sizeof(tmp), - "ACCOUNT=%s;", job_ptr->account); + /* allow QOS spec in form "qos-name" */ + if (!strncmp(job_ptr->account,"qos-",4)) { + snprintf(tmp, sizeof(tmp), + "QOS=%s;", job_ptr->account + 4); + } else { + snprintf(tmp, sizeof(tmp), + "ACCOUNT=%s;", job_ptr->account); + } xstrcat(buf, tmp); } diff --git a/src/plugins/sched/wiki/start_job.c b/src/plugins/sched/wiki/start_job.c index f7cdd46d189ad8b080c780beb3ac794e7a9ca0ee..180e79742dd7a46e2e30c8830cfc217023da8274 100644 --- a/src/plugins/sched/wiki/start_job.c +++ b/src/plugins/sched/wiki/start_job.c @@ -195,6 +195,7 @@ static int _start_job(uint32_t jobid, int task_cnt, char *hostlist, * performs many string compares. */ xfree(job_ptr->details->req_node_layout); if (task_cnt && cr_enabled) { + uint16_t cpus_per_task = MAX(1, job_ptr->details->cpus_per_task); job_ptr->details->req_node_layout = (uint16_t *) xmalloc(bit_set_count(new_bitmap) * sizeof(uint16_t)); bsize = bit_size(new_bitmap); @@ -212,7 +213,8 @@ static int _start_job(uint32_t jobid, int task_cnt, char *hostlist, if ((node_idx[node_name_len] == ',') || (node_idx[node_name_len] == '\0')) { job_ptr->details-> - req_node_layout[ll]++; + req_node_layout[ll] += + cpus_per_task; } node_cur = strchr(node_idx, ','); if (node_cur) diff --git a/src/plugins/sched/wiki2/start_job.c b/src/plugins/sched/wiki2/start_job.c index d6fd7dd8201d8e1f633723ccceb90d5c1e63f800..564919f93ad682e69be342566ead6493d7cfc754 100644 --- a/src/plugins/sched/wiki2/start_job.c +++ b/src/plugins/sched/wiki2/start_job.c @@ -251,6 +251,7 @@ static int _start_job(uint32_t jobid, int task_cnt, char *hostlist, * performs many string compares. */ xfree(job_ptr->details->req_node_layout); if (task_cnt && cr_enabled) { + uint16_t cpus_per_task = MAX(1, job_ptr->details->cpus_per_task); job_ptr->details->req_node_layout = (uint16_t *) xmalloc(bit_set_count(new_bitmap) * sizeof(uint16_t)); bsize = bit_size(new_bitmap); @@ -268,7 +269,8 @@ static int _start_job(uint32_t jobid, int task_cnt, char *hostlist, if ((node_idx[node_name_len] == ',') || (node_idx[node_name_len] == '\0')) { job_ptr->details-> - req_node_layout[ll]++; + req_node_layout[ll] += + cpus_per_task; } node_cur = strchr(node_idx, ','); if (node_cur) diff --git a/src/plugins/select/bluegene/block_allocator/block_allocator.c b/src/plugins/select/bluegene/block_allocator/block_allocator.c index f8cc7dc0967409d637ea40ee885847846b71556e..9c0467374b800c4b20621eacd58353a18f3bd019 100644 --- a/src/plugins/select/bluegene/block_allocator/block_allocator.c +++ b/src/plugins/select/bluegene/block_allocator/block_allocator.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * block_allocator.c - Assorted functions for layout of bglblocks, * wiring, mapping for smap, etc. - * $Id: block_allocator.c 12543 2007-10-23 22:19:49Z jette $ + * $Id: block_allocator.c 13150 2008-01-31 22:59:13Z da $ ***************************************************************************** * Copyright (C) 2004 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -1264,6 +1264,7 @@ extern int copy_node_path(List nodes, List dest_nodes) #endif return rc; } + extern int check_and_set_node_list(List nodes) { int rc = SLURM_ERROR; diff --git a/src/plugins/select/bluegene/block_allocator/block_allocator.h b/src/plugins/select/bluegene/block_allocator/block_allocator.h index cc69f7d3595b8a25310b0d120a1b88c0c26763dc..91fe587478f55e963a80a597abcc149801ce8259 100644 --- a/src/plugins/select/bluegene/block_allocator/block_allocator.h +++ b/src/plugins/select/bluegene/block_allocator/block_allocator.h @@ -109,6 +109,7 @@ typedef struct { bool rotate; bool elongate; List elongate_geos; + bitstr_t *avail_node_bitmap; /* pointer to available nodes */ } ba_request_t; typedef struct { @@ -172,7 +173,7 @@ typedef struct * ba_node_t: node within the allocation system. */ typedef struct { - /* set if using this node in a block*/ + /* set if using this node in a block */ bool used; /* coordinates */ diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c index f03eae075ad2d68af028d2bc7f778946e34fdfe1..30d60327b72102027de95209dd15428506f0c44f 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_place.c +++ b/src/plugins/select/bluegene/plugin/bg_job_place.c @@ -2,7 +2,7 @@ * bg_job_place.c - blue gene job placement (e.g. base block selection) * functions. * - * $Id: bg_job_place.c 12627 2007-11-06 19:48:55Z jette $ + * $Id: bg_job_place.c 13271 2008-02-14 20:02:00Z da $ ***************************************************************************** * Copyright (C) 2004-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -209,7 +209,6 @@ static int _find_best_block_match(struct job_record* job_ptr, uint32_t req_procs = job_ptr->num_procs; uint32_t proc_cnt; ba_request_t request; - ba_request_t *try_request = NULL; int i; int rot_cnt = 0; int created = 0; @@ -256,50 +255,7 @@ static int _find_best_block_match(struct job_record* job_ptr, if(start[X] != (uint16_t)NO_VAL) start_req = 1; - if(num_unused_cpus != total_cpus) { - /* - see if we have already tried to create this - size but couldn't make it right now no reason - to try again - */ - slurm_mutex_lock(&request_list_mutex); - itr = list_iterator_create(bg_request_list); - while ((try_request = list_next(itr))) { - if(start_req) { - if ((try_request->start[X] != start[X]) - || (try_request->start[Y] != start[Y]) - || (try_request->start[Z] != start[Z])) { - debug4("got %c%c%c looking for %c%c%c", - alpha_num[try_request->start[X]], - alpha_num[try_request->start[Y]], - alpha_num[try_request->start[Z]], - alpha_num[start[X]], - alpha_num[start[Y]], - alpha_num[start[Z]]); - continue; - } - debug3("found %c%c%c looking for %c%c%c", - alpha_num[try_request->start[X]], - alpha_num[try_request->start[Y]], - alpha_num[try_request->start[Z]], - alpha_num[start[X]], - alpha_num[start[Y]], - alpha_num[start[Z]]); - } - if(try_request->procs == req_procs) { - debug("already tried to create but " - "can't right now."); - list_iterator_destroy(itr); - slurm_mutex_unlock(&request_list_mutex); - if(test_only) - return SLURM_SUCCESS; - else - return SLURM_ERROR; - } - } - list_iterator_destroy(itr); - slurm_mutex_unlock(&request_list_mutex); - } + select_g_get_jobinfo(job_ptr->select_jobinfo, SELECT_DATA_CONN_TYPE, &conn_type); select_g_get_jobinfo(job_ptr->select_jobinfo, @@ -471,8 +427,8 @@ try_again: req_procs, max_procs, proc_cnt); if ((proc_cnt < req_procs) || ((max_procs != NO_VAL) && (proc_cnt > max_procs))) { - /* We use the proccessor count per partition here - mostly to see if we can run on a smaller partition. + /* We use the proccessor count per block here + mostly to see if we can run on a smaller block. */ convert_num_unit((float)proc_cnt, tmp_char, sizeof(tmp_char), UNIT_NONE); @@ -522,7 +478,7 @@ try_again: continue; } - /* Make sure no other partitions are under this partition + /* Make sure no other blocks are under this block are booted and running jobs */ itr2 = list_iterator_create(bg_list); @@ -719,7 +675,12 @@ try_again: request.linuximage = linuximage; request.mloaderimage = mloaderimage; request.ramdiskimage = ramdiskimage; - + if(job_ptr->details->req_node_bitmap) + request.avail_node_bitmap = + job_ptr->details->req_node_bitmap; + else + request.avail_node_bitmap = slurm_block_bitmap; + debug("trying with all free blocks"); if(create_dynamic_block(&request, NULL) == SLURM_ERROR) { error("this job will never run on " @@ -735,24 +696,6 @@ try_again: goto end_it; } - /* - add request to list so we don't try again until - something happens like a job finishing or - something so we can try again - */ - debug3("adding %d %d", - request.procs, request.conn_type); - try_request = xmalloc(sizeof(ba_request_t)); - try_request->procs = req_procs; - try_request->save_name = NULL; - try_request->elongate_geos = NULL; - try_request->start_req = request.start_req; - for(i=0; i<BA_SYSTEM_DIMENSIONS; i++) - try_request->start[i] = start[i]; - slurm_mutex_lock(&request_list_mutex); - list_push(bg_request_list, try_request); - slurm_mutex_unlock(&request_list_mutex); - slurm_conf_lock(); snprintf(tmp_char, sizeof(tmp_char), "%s%s", slurmctld_conf.node_prefix, @@ -812,6 +755,12 @@ try_again: request.linuximage = linuximage; request.mloaderimage = mloaderimage; request.ramdiskimage = ramdiskimage; + if(job_ptr->details->req_node_bitmap) + request.avail_node_bitmap = + job_ptr->details->req_node_bitmap; + else + request.avail_node_bitmap = slurm_block_bitmap; + /* 1- try empty space 2- we see if we can create one in the unused bps diff --git a/src/plugins/select/bluegene/plugin/bg_job_run.c b/src/plugins/select/bluegene/plugin/bg_job_run.c index f656c0525af4e0d881931a9c0d5a7dd6a2b2b787..5153de5689352b5fa20f5fac44a27691416fe642 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_run.c +++ b/src/plugins/select/bluegene/plugin/bg_job_run.c @@ -2,7 +2,7 @@ * bg_job_run.c - blue gene job execution (e.g. initiation and termination) * functions. * - * $Id: bg_job_run.c 11274 2007-03-30 19:39:49Z da $ + * $Id: bg_job_run.c 13271 2008-02-14 20:02:00Z da $ ***************************************************************************** * Copyright (C) 2004-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -589,9 +589,6 @@ static void _term_agent(bg_update_t *bg_update_ptr) bg_record->bg_block_id, bg_record->user_name); - if(bluegene_layout_mode == LAYOUT_DYNAMIC) - remove_from_request_list(); - if(job_remove_failed) { char time_str[32]; slurm_make_time_str(&now, time_str, sizeof(time_str)); diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c index c684d32f53578c3149d8ad49244e927e0f86b4fb..30684bf23bbe289a75e13cf728871205fd3dbff2 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.c +++ b/src/plugins/select/bluegene/plugin/bluegene.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * bluegene.c - blue gene node configuration processing module. * - * $Id: bluegene.c 12450 2007-10-05 18:22:36Z da $ + * $Id: bluegene.c 13271 2008-02-14 20:02:00Z da $ ***************************************************************************** * Copyright (C) 2004 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -59,8 +59,6 @@ List bg_found_block_list = NULL; /* found bg blocks already on system */ List bg_job_block_list = NULL; /* jobs running in these blocks */ List bg_booted_block_list = NULL; /* blocks that are booted */ List bg_freeing_list = NULL; /* blocks that being freed */ -List bg_request_list = NULL; /* list of request that can't - be made just yet */ List bg_blrtsimage_list = NULL; List bg_linuximage_list = NULL; @@ -80,7 +78,6 @@ uint16_t bridge_api_verb = 0; bool agent_fini = false; time_t last_bg_update; pthread_mutex_t block_state_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_mutex_t request_list_mutex = PTHREAD_MUTEX_INITIALIZER; int num_block_to_free = 0; int num_block_freed = 0; int blocks_are_created = 0; @@ -185,13 +182,6 @@ extern void fini_bg(void) while(destroy_cnt > 0) usleep(1000); - slurm_mutex_lock(&request_list_mutex); - if (bg_request_list) { - list_destroy(bg_request_list); - bg_request_list = NULL; - } - slurm_mutex_unlock(&request_list_mutex); - if(bg_blrtsimage_list) { list_destroy(bg_blrtsimage_list); bg_blrtsimage_list = NULL; @@ -407,6 +397,7 @@ extern void process_nodes(bg_record_t *bg_record) start); if(bg_record->nodes[j] != ',') break; + j--; } j++; } @@ -423,6 +414,8 @@ extern void process_nodes(bg_record_t *bg_record) itr = list_iterator_create(bg_record->bg_block_list); while ((ba_node = list_next(itr)) != NULL) { + if(!ba_node->used) + continue; debug4("%c%c%c is included in this block", alpha_num[ba_node->coord[X]], alpha_num[ba_node->coord[Y]], @@ -1205,6 +1198,89 @@ extern int create_dynamic_block(ba_request_t *request, List my_block_list) debug("No list was given"); } + if(request->avail_node_bitmap) { + int j=0, number; + int x,y,z; + char *nodes = NULL; + bitstr_t *bitmap = bit_alloc(node_record_count); + int start[BA_SYSTEM_DIMENSIONS]; + int end[BA_SYSTEM_DIMENSIONS]; + + /* we want the bps that aren't in this partition to + * mark them as used + */ + bit_or(bitmap, request->avail_node_bitmap); + bit_not(bitmap); + nodes = bitmap2node_name(bitmap); + //info("not using %s", nodes); + while(nodes[j] != '\0') { + if ((nodes[j] == '[' || nodes[j] == ',') + && (nodes[j+8] == ']' || nodes[j+8] == ',') + && (nodes[j+4] == 'x' || nodes[j+4] == '-')) { + j++; + number = xstrntol(nodes + j, + NULL, BA_SYSTEM_DIMENSIONS, + HOSTLIST_BASE); + start[X] = number / + (HOSTLIST_BASE * HOSTLIST_BASE); + start[Y] = (number % + (HOSTLIST_BASE * HOSTLIST_BASE)) + / HOSTLIST_BASE; + start[Z] = (number % HOSTLIST_BASE); + j += 4; + number = xstrntol(nodes + j, + NULL, 3, HOSTLIST_BASE); + end[X] = number / + (HOSTLIST_BASE * HOSTLIST_BASE); + end[Y] = (number + % (HOSTLIST_BASE * HOSTLIST_BASE)) + / HOSTLIST_BASE; + end[Z] = (number % HOSTLIST_BASE); + j += 3; + for (x = start[X]; x <= end[X]; x++) { + for (y = start[Y]; y <= end[Y]; y++) { + for (z = start[Z]; + z <= end[Z]; z++) { + ba_system_ptr-> + grid[x] +#ifdef HAVE_BG + [y][z] +#endif + .used = 1; + } + } + } + + if(nodes[j] != ',') + break; + j--; + } else if((nodes[j] >= '0' && nodes[j] <= '9') + || (nodes[j] >= 'A' && nodes[j] <= 'Z')) { + + number = xstrntol(nodes + j, + NULL, BA_SYSTEM_DIMENSIONS, + HOSTLIST_BASE); + x = number / (HOSTLIST_BASE * HOSTLIST_BASE); + y = (number % (HOSTLIST_BASE * HOSTLIST_BASE)) + / HOSTLIST_BASE; + z = (number % HOSTLIST_BASE); + j+=3; + ba_system_ptr->grid[x] +#ifdef HAVE_BG + [y][z] +#endif + .used = 1; + + if(nodes[j] != ',') + break; + j--; + } + j++; + } + xfree(nodes); + FREE_NULL_BITMAP(bitmap); + } + if(request->size==1 && request->procs < bluegene_bp_node_cnt) { request->conn_type = SELECT_SMALL; if(request->procs == (procs_per_node/16)) { @@ -1228,14 +1304,14 @@ extern int create_dynamic_block(ba_request_t *request, List my_block_list) } num_quarter=4; } - + if(_breakup_blocks(request, my_block_list) != SLURM_SUCCESS) { debug2("small block not able to be placed"); //rc = SLURM_ERROR; } else goto finished; } - + if(request->conn_type == SELECT_NAV) request->conn_type = SELECT_TORUS; @@ -1531,31 +1607,6 @@ extern int remove_from_bg_list(List my_bg_list, bg_record_t *bg_record) return rc; } -extern int remove_from_request_list() -{ - ba_request_t *try_request = NULL; - ListIterator itr; - int rc = SLURM_ERROR; - - /* - remove all requests out of the list. - */ - - slurm_mutex_lock(&request_list_mutex); - itr = list_iterator_create(bg_request_list); - while ((try_request = list_next(itr)) != NULL) { - debug3("removing size %d", - try_request->procs); - list_remove(itr); - delete_ba_request(try_request); - //list_iterator_reset(itr); - rc = SLURM_SUCCESS; - } - list_iterator_destroy(itr); - slurm_mutex_unlock(&request_list_mutex); - return rc; -} - extern int bg_free_block(bg_record_t *bg_record) { #ifdef HAVE_BG_FILES @@ -1705,8 +1756,6 @@ extern void *mult_destroy_block(void *args) */ sort_bg_record_inc_size(bg_freeing_list); - remove_from_request_list(); - slurm_mutex_lock(&block_state_mutex); if(remove_from_bg_list(bg_job_block_list, bg_record) == SLURM_SUCCESS) { @@ -2361,7 +2410,7 @@ static int _addto_node_list(bg_record_t *bg_record, int *start, int *end) alpha_num[DIM_SIZE[X]], alpha_num[DIM_SIZE[Y]], alpha_num[DIM_SIZE[Z]]); } - debug3("bluegene.conf: %c%c%cx%c%c%c", + debug3("adding bps: %c%c%cx%c%c%c", alpha_num[start[X]], alpha_num[start[Y]], alpha_num[start[Z]], alpha_num[end[X]], alpha_num[end[Y]], alpha_num[end[Z]]); debug3("slurm.conf: %c%c%c", @@ -2380,6 +2429,7 @@ static int _addto_node_list(bg_record_t *bg_record, int *start, int *end) slurm_conf_unlock(); ba_node = ba_copy_node( &ba_system_ptr->grid[x][y][z]); + ba_node->used = 1; list_append(bg_record->bg_block_list, ba_node); node_count++; } @@ -2436,12 +2486,6 @@ static void _set_bg_lists() list_destroy(bg_list); bg_list = list_create(destroy_bg_record); - slurm_mutex_lock(&request_list_mutex); - if(bg_request_list) - list_destroy(bg_request_list); - bg_request_list = list_create(delete_ba_request); - slurm_mutex_unlock(&request_list_mutex); - slurm_mutex_unlock(&block_state_mutex); if(bg_blrtsimage_list) @@ -2865,6 +2909,14 @@ static int _breakup_blocks(ba_request_t *request, List my_block_list) continue; if(bg_record->state != RM_PARTITION_FREE) continue; + if (request->avail_node_bitmap && + !bit_super_set(bg_record->bitmap, + request->avail_node_bitmap)) { + debug2("bg block %s has nodes not usable by this job", + bg_record->bg_block_id); + continue; + } + if(request->start_req) { if ((request->start[X] != bg_record->start[X]) || (request->start[Y] != bg_record->start[Y]) @@ -2889,7 +2941,7 @@ static int _breakup_blocks(ba_request_t *request, List my_block_list) proc_cnt = bg_record->bp_count * bg_record->cpus_per_bp; if(proc_cnt == request->procs) { - debug2("found it here %s, %s", + debug("found it here %s, %s", bg_record->bg_block_id, bg_record->nodes); request->save_name = xmalloc(4); @@ -2961,6 +3013,14 @@ static int _breakup_blocks(ba_request_t *request, List my_block_list) != NULL) { if(bg_record->job_running != NO_JOB_RUNNING) continue; + if (request->avail_node_bitmap && + !bit_super_set(bg_record->bitmap, + request->avail_node_bitmap)) { + debug2("bg block %s has nodes not usable by this job", + bg_record->bg_block_id); + continue; + } + if(request->start_req) { if ((request->start[X] != bg_record->start[X]) || (request->start[Y] != bg_record->start[Y]) diff --git a/src/plugins/select/bluegene/plugin/select_bluegene.c b/src/plugins/select/bluegene/plugin/select_bluegene.c index 48752fc999b5a46c19328d2774e648061c9c459d..085543cd20292be2674b9db38fdee1d087829d0d 100644 --- a/src/plugins/select/bluegene/plugin/select_bluegene.c +++ b/src/plugins/select/bluegene/plugin/select_bluegene.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * select_bluegene.c - node selection plugin for Blue Gene system. * - * $Id: select_bluegene.c 12409 2007-09-26 16:32:16Z jette $ + * $Id: select_bluegene.c 13271 2008-02-14 20:02:00Z da $ ***************************************************************************** * Copyright (C) 2004-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -1085,8 +1085,6 @@ extern int select_p_update_node_state (int index, uint16_t state) } } #endif - if(bluegene_layout_mode == LAYOUT_DYNAMIC) - remove_from_request_list(); return SLURM_ERROR; } diff --git a/src/salloc/opt.c b/src/salloc/opt.c index 80b5435bcbf52f36d907a8906961f208a9c70672..6833d240862f86003e8ed20b9e6f90318785ce39 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -669,6 +669,7 @@ void set_options(const int argc, char **argv) {"no-kill", no_argument, 0, 'k'}, {"kill-command", optional_argument, 0, 'K'}, {"tasks", required_argument, 0, 'n'}, + {"ntasks", required_argument, 0, 'n'}, {"nodes", required_argument, 0, 'N'}, {"overcommit", no_argument, 0, 'O'}, {"partition", required_argument, 0, 'p'}, @@ -827,6 +828,9 @@ void set_options(const int argc, char **argv) #ifdef HAVE_BG info("\tThe nodelist option should only be used if\n" "\tthe block you are asking for can be created.\n" + "\tIt should also include all the midplanes you\n" + "\twant to use, partial lists may not\n" + "\twork correctly.\n" "\tPlease consult smap before using this option\n" "\tor your job may be stuck with no way to run."); #endif diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index 82633fa422d69808094505e26d9f5a7e36fc6c18..d94a2466156301a0cb6b0f1fe98e530908299240 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -622,6 +622,7 @@ static struct option long_options[] = { {"immediate", no_argument, 0, 'I'}, {"job-name", required_argument, 0, 'J'}, {"no-kill", no_argument, 0, 'k'}, + {"tasks", required_argument, 0, 'n'}, {"ntasks", required_argument, 0, 'n'}, {"nodes", required_argument, 0, 'N'}, {"output", required_argument, 0, 'o'}, @@ -1154,6 +1155,9 @@ static void _set_options(int argc, char **argv) #ifdef HAVE_BG info("\tThe nodelist option should only be used if\n" "\tthe block you are asking for can be created.\n" + "\tIt should also include all the midplanes you\n" + "\twant to use, partial lists may not\n" + "\twork correctly.\n" "\tPlease consult smap before using this option\n" "\tor your job may be stuck with no way to run."); #endif diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c index 2fd84f78dd8eaaae998df5162709fc14ef6be174..9ad0b2a77f610ebeb0a1dd77390b1db6c7fe15a9 100644 --- a/src/sbatch/sbatch.c +++ b/src/sbatch/sbatch.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * sbatch.c - Submit a SLURM batch script. * - * $Id: sbatch.c 12856 2007-12-19 00:18:44Z jette $ + * $Id: sbatch.c 13231 2008-02-08 17:16:47Z jette $ ***************************************************************************** * Copyright (C) 2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -216,11 +216,12 @@ static int fill_job_desc_from_opts(job_desc_msg_t *desc) struct passwd *pw = NULL; pw = getpwuid(opt.uid); if (pw != NULL) { - desc->environment = env_array_user_default(pw->pw_name, + desc->environment = env_array_user_default( + pw->pw_name, opt.get_user_env_time, opt.get_user_env_mode); - /* FIXME - should we abort if j->environment - * is NULL? */ + if (desc->environment == NULL) + exit(1); /* error already logged */ } } env_array_merge(&desc->environment, (const char **)environ); diff --git a/src/slaunch/opt.c b/src/slaunch/opt.c index 83658f41f5660ed313b3448f930820d696da8376..de1b25d64ee66f54fa7f4838ce1edee2584b6a35 100644 --- a/src/slaunch/opt.c +++ b/src/slaunch/opt.c @@ -1145,6 +1145,9 @@ void set_options(const int argc, char **argv) #ifdef HAVE_BG info("\tThe nodelist option should only be used if\n" "\tthe block you are asking for can be created.\n" + "\tIt should also include all the midplanes you\n" + "\twant to use, partial lists may not\n" + "\twork correctly.\n" "\tPlease consult smap before using this option\n" "\tor your job may be stuck with no way to run."); #endif diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 99769cf93c59635273668751667d40109ea30989..58d26e45f88dc9e69668d8d91d546c460679548c 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * controller.c - main control machine daemon for slurm - * $Id: controller.c 13077 2008-01-23 22:31:44Z da $ + * $Id: controller.c 13156 2008-02-01 17:43:01Z da $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -842,8 +842,8 @@ static int _gold_mark_all_nodes_down(char *reason, time_t event_time) state_file = xstrdup (slurmctld_conf.state_save_location); xstrcat (state_file, "/node_state"); if (stat(state_file, &stat_buf)) { - error("_gold_mark_all_nodes_down: could not stat(%s) to record " - "node down time", state_file); + debug("_gold_mark_all_nodes_down: could not stat(%s) " + "to record node down time", state_file); xfree(state_file); return rc; } diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 5109f4cf6df2bba5ee8859cf7a70bc956f5d30c7..923670c928a352f0fc9227081fbe00bb4aeceef8 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -3,7 +3,7 @@ * Note: there is a global job list (job_list), time stamp * (last_job_update), and hash table (job_hash) * - * $Id: job_mgr.c 13083 2008-01-24 16:28:23Z jette $ + * $Id: job_mgr.c 13176 2008-02-04 16:56:57Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -1064,6 +1064,7 @@ extern int kill_running_job_by_node_name(char *node_name, bool step_test) } else if (job_ptr->batch_flag && job_ptr->details && (job_ptr->details->no_requeue == 0)) { uint16_t save_state; + srun_node_fail(job_ptr->job_id, node_name); info("requeue job %u due to failure of node %s", job_ptr->job_id, node_name); _set_job_prio(job_ptr); @@ -2741,7 +2742,7 @@ static int _validate_job_desc(job_desc_msg_t * job_desc_msg, int allocate, if (job_desc_msg->job_max_memory == NO_VAL) job_desc_msg->job_max_memory = 1; /* default 1MB mem per node */ if (job_desc_msg->job_min_tmp_disk == NO_VAL) - job_desc_msg->job_min_tmp_disk = 1;/* default 1MB disk per node */ + job_desc_msg->job_min_tmp_disk = 0;/* default 0MB disk per node */ return SLURM_SUCCESS; } diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 7478bc6062aeed2917abe713367a82316bbad159..5f07cff6f16cfa4e0dd227d7faf6e527da0f0abb 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -4,7 +4,7 @@ * hash table (node_hash_table), time stamp (last_node_update) and * configuration list (config_list) * - * $Id: node_mgr.c 13068 2008-01-23 18:41:54Z jette $ + * $Id: node_mgr.c 13274 2008-02-14 21:32:07Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -1919,7 +1919,8 @@ void node_not_resp (char *name, time_t msg_time) error ("node_not_resp unable to find node %s", name); return; } - error("Node %s not responding", node_ptr->name); + if ((node_ptr->node_state & NODE_STATE_BASE) != NODE_STATE_DOWN) + error("Node %s not responding", node_ptr->name); _node_not_resp(node_ptr, msg_time); #endif } diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 16f7661360d4da19c34a06d64368893e596f2d99..b8903bc6280316183b046cfdf839d9fa78d8859a 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -2,7 +2,7 @@ * node_scheduler.c - select and allocated nodes to jobs * Note: there is a global node table (node_record_table_ptr) * - * $Id: node_scheduler.c 13061 2008-01-22 21:23:56Z da $ + * $Id: node_scheduler.c 13234 2008-02-08 22:13:39Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -300,18 +300,26 @@ _pick_best_load(struct job_record *job_ptr, bitstr_t * bitmap, uint32_t req_nodes, bool test_only) { bitstr_t *basemap; - int i, error_code = EINVAL, node_cnt = 0, prev_cnt = 0, set_cnt; - - basemap = bit_copy(bitmap); - if (basemap == NULL) - fatal("bit_copy malloc failure"); + int i, max_bit, error_code = EINVAL; + int node_cnt = 0, prev_cnt = 0, set_cnt; set_cnt = bit_set_count(bitmap); if ((set_cnt < min_nodes) || ((req_nodes > min_nodes) && (set_cnt < req_nodes))) return error_code; /* not usable */ + basemap = bit_copy(bitmap); + if (basemap == NULL) + fatal("bit_copy malloc failure"); + + max_bit = bit_size(bitmap) - 1; for (i=0; node_cnt<set_cnt; i++) { + /* if req_nodes, then start with those as a baseline */ + if (job_ptr->details && job_ptr->details->req_node_bitmap) { + bit_copybits(bitmap, job_ptr->details->req_node_bitmap); + } else { + bit_nclear(bitmap, 0, max_bit); + } node_cnt = _job_count_bitmap(basemap, bitmap, i); if ((node_cnt == 0) || (node_cnt == prev_cnt)) continue; /* nothing new to test */ @@ -332,7 +340,7 @@ _pick_best_load(struct job_record *job_ptr, bitstr_t * bitmap, /* * Set the bits in 'jobmap' that correspond to bits in the 'bitmap' - * that are running 'job_cnt' jobs or less, and clear the rest. + * that are running 'job_cnt' jobs or less. */ static int _job_count_bitmap(bitstr_t * bitmap, bitstr_t * jobmap, int job_cnt) @@ -345,8 +353,6 @@ _job_count_bitmap(bitstr_t * bitmap, bitstr_t * jobmap, int job_cnt) (node_record_table_ptr[i].run_job_cnt <= job_cnt)) { bit_set(jobmap, i); count++; - } else { - bit_clear(jobmap, i); } } return count; diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c index 62ab07a80b6ff2323ad1bf63397e78ffe391f5be..4d31c895af885f1945209136ace689ca2f3c10f2 100644 --- a/src/slurmctld/partition_mgr.c +++ b/src/slurmctld/partition_mgr.c @@ -2,7 +2,7 @@ * partition_mgr.c - manage the partition information of slurm * Note: there is a global partition list (part_list) and * time stamp (last_part_update) - * $Id: partition_mgr.c 12452 2007-10-05 19:07:07Z da $ + * $Id: partition_mgr.c 13279 2008-02-15 00:14:16Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -835,12 +835,14 @@ int update_part(update_part_msg_t * part_desc) xfree(part_ptr->allow_uids); if ((strcasecmp(part_desc->allow_groups, "ALL") == 0) || (part_desc->allow_groups[0] == '\0')) { - info("update_part: setting allow_groups to ALL for partition %s", + info("update_part: setting allow_groups to ALL for " + "partition %s", part_desc->name); } else { part_ptr->allow_groups = part_desc->allow_groups; part_desc->allow_groups = NULL; - info("update_part: setting allow_groups to %s for partition %s", + info("update_part: setting allow_groups to %s for " + "partition %s", part_ptr->allow_groups, part_desc->name); part_ptr->allow_uids = _get_groups_members(part_ptr->allow_groups); @@ -852,8 +854,14 @@ int update_part(update_part_msg_t * part_desc) if (part_desc->nodes[0] == '\0') part_ptr->nodes = NULL; /* avoid empty string */ - else + else { + int i; part_ptr->nodes = xstrdup(part_desc->nodes); + for (i=0; part_ptr->nodes[i]; i++) { + if (isspace(part_ptr->nodes[i])) + part_ptr->nodes[i] = ','; + } + } error_code = _build_part_bitmap(part_ptr); if (error_code) { @@ -861,7 +869,7 @@ int update_part(update_part_msg_t * part_desc) part_ptr->nodes = backup_node_list; } else { info("update_part: setting nodes to %s for partition %s", - part_desc->nodes, part_desc->name); + part_ptr->nodes, part_desc->name); xfree(backup_node_list); } } diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index f5768a3fb9d687662d8013cd58777262aea2727f..e222d1e86bc1702bdd6d0ce0e700b08da3ec9389 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * proc_req.c - process incomming messages to slurmctld * - * $Id: proc_req.c 12413 2007-09-26 17:32:47Z jette $ + * $Id: proc_req.c 13237 2008-02-08 23:16:16Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -324,6 +324,7 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr) conf_ptr->job_credential_public_certificate = xstrdup(conf-> job_credential_public_certificate); conf_ptr->job_file_append = conf->job_file_append; + conf_ptr->get_env_timeout = conf->get_env_timeout; conf_ptr->kill_wait = conf->kill_wait; conf_ptr->mail_prog = xstrdup(conf->mail_prog); conf_ptr->max_job_cnt = conf->max_job_cnt; diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index 43aa77897f19ea43a3e85d9bb5c09bafee175639..2da3cfdc5c5efa1e9d5e9392ffe2d2d573382f0c 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * step_mgr.c - manage the job step information of slurm - * $Id: step_mgr.c 12681 2007-11-26 18:56:25Z jette $ + * $Id: step_mgr.c 13155 2008-02-01 17:30:43Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -701,8 +701,16 @@ step_create(job_step_create_request_msg_t *step_specs, if (job_ptr == NULL) return ESLURM_INVALID_JOB_ID ; - if ((job_ptr->job_state == JOB_SUSPENDED) || IS_JOB_PENDING(job_ptr)) + if (job_ptr->job_state == JOB_SUSPENDED) return ESLURM_DISABLED; + if (IS_JOB_PENDING(job_ptr)) { + /* NOTE: LSF creates a job allocation for batch jobs. + * After the allocation has been made, LSF submits a + * job to run in that allocation (sbatch --jobid= ...). + * If that job is pending either LSF messed up or LSF is + * not being used. We have seen this problem with Moab. */ + return ESLURM_DUPLICATE_JOB_ID; + } if (batch_step) { info("user %u attempting to run batch script within " diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index 0091fadc28f0b3c48fb7fa12437a9a1170f4aadf..6ac03c22d6365fe23e0c382eb9cf766db941dd7e 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/mgr.c - job manager functions for slurmstepd - * $Id: mgr.c 12647 2007-11-12 17:09:47Z da $ + * $Id: mgr.c 13229 2008-02-08 01:02:06Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -280,7 +280,7 @@ mgr_launch_batch_job_setup(batch_job_launch_msg_t *msg, slurm_addr *cli) } /* this is the new way of setting environment variables */ - env_array_for_batch_job(&job->env, msg); + env_array_for_batch_job(&job->env, msg, conf->node_name); /* this is the old way of setting environment variables */ job->envtp->nprocs = msg->nprocs; diff --git a/src/smap/configure_functions.c b/src/smap/configure_functions.c index fdbb327e2b0362d00be71766366c0fe423605beb..aa5294f7138600b687b946b93ff77f240bd1e128 100644 --- a/src/smap/configure_functions.c +++ b/src/smap/configure_functions.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * configure_functions.c - Functions related to configure mode of smap. - * $Id: configure_functions.c 11985 2007-08-09 23:07:08Z da $ + * $Id: configure_functions.c 13270 2008-02-14 19:40:44Z da $ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -213,7 +213,8 @@ static int _create_allocation(char *com, List allocated_blocks) request->nodecards = 0; request->quarters = 0; request->passthrough = false; - + request->avail_node_bitmap = NULL; + while(i<len) { if(!strncasecmp(com+i, "mesh", 4)) { request->conn_type=SELECT_MESH; @@ -855,7 +856,8 @@ static int _copy_allocation(char *com, List allocated_blocks) request->rotate_count= 0; request->elongate_count = 0; request->elongate_geos = list_create(NULL); - + request->avail_node_bitmap = NULL; + results_i = list_iterator_create(request->elongate_geos); while ((geo_ptr = list_next(results_i)) != NULL) { geo = xmalloc(sizeof(int)*3); @@ -1098,6 +1100,7 @@ static int _add_bg_record(blockreq_t *blockreq, List allocated_blocks) bp_count++; if(nodes[j] != ',') break; + j--; } j++; } diff --git a/src/smap/partition_functions.c b/src/smap/partition_functions.c index 2dd270b00740fe0bfd3aece334267731ad25059b..9e8c6743b9a86961221d96d6070903ae83ac0614 100644 --- a/src/smap/partition_functions.c +++ b/src/smap/partition_functions.c @@ -425,6 +425,7 @@ static int _marknodes(db2_block_info_t *block_ptr, int count) 0); if(block_ptr->nodes[j] != ',') break; + j--; } j++; } @@ -914,6 +915,7 @@ static int _make_nodelist(char *nodes, List nodelist) _addto_nodelist(nodelist, start, start); if(nodes[j] != ',') break; + j--; } j++; } diff --git a/src/srun/allocate.c b/src/srun/allocate.c index a06ea95966d6af57affd4ec8dfbbc3c5fa0b218f..b1abd672acf29f57b0179aaf9610c21a59464eab 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/srun/allocate.c - srun functions for managing node allocations - * $Id: allocate.c 12700 2007-11-27 23:39:24Z jette $ + * $Id: allocate.c 13231 2008-02-08 17:16:47Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -561,12 +561,12 @@ job_desc_msg_create_from_opts (char *script) struct passwd *pw = NULL; pw = getpwuid(opt.uid); if (pw != NULL) { - j->environment = - env_array_user_default(pw->pw_name, + j->environment = env_array_user_default( + pw->pw_name, opt.get_user_env_time, opt.get_user_env_mode); - /* FIXME - should we abort if j->environment - is NULL? */ + if (j->environment == NULL) + exit(1); /* error already logged */ } } env_array_merge(&j->environment, (const char **)environ); diff --git a/src/srun/opt.c b/src/srun/opt.c index fca5cf8ead2e5b4f7fee62b664205e17ab79d4db..4802c930b46fedf199a374d5f2ec36bb64079b7f 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * opt.c - options processing for srun - * $Id: opt.c 12856 2007-12-19 00:18:44Z jette $ + * $Id: opt.c 13270 2008-02-14 19:40:44Z da $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -1721,6 +1721,9 @@ void set_options(const int argc, char **argv, int first) #ifdef HAVE_BG info("\tThe nodelist option should only be used if\n" "\tthe block you are asking for can be created.\n" + "\tIt should also include all the midplanes you\n" + "\twant to use, partial lists may not\n" + "\twork correctly.\n" "\tPlease consult smap before using this option\n" "\tor your job may be stuck with no way to run."); #endif @@ -2168,7 +2171,7 @@ static void _opt_args(int argc, char **argv) char *fullpath; char *cmd = remote_argv[0]; bool search_cwd = (opt.batch || opt.allocate); - int mode = (search_cwd) ? R_OK : R_OK | X_OK; + int mode = (search_cwd) ? R_OK : X_OK; if ((fullpath = _search_path(cmd, search_cwd, mode))) { xfree(remote_argv[0]); diff --git a/src/sview/part_info.c b/src/sview/part_info.c index 48c274ea1ea0330d8c7a3d84df225f31f8202a14..bf53ddb10457eaa20b2e1f98c3d130f553e8be5f 100644 --- a/src/sview/part_info.c +++ b/src/sview/part_info.c @@ -1390,6 +1390,7 @@ static List _create_part_info_list(partition_info_msg_t *part_info_ptr, ListIterator itr = NULL; hostlist_t hl; #ifdef HAVE_BG + int j; bg_info_record_t *bg_info_record = NULL; int node_scaling = part_info_ptr->partition_array[0].node_scaling; char *slurm_user = NULL; @@ -1451,9 +1452,9 @@ static List _create_part_info_list(partition_info_msg_t *part_info_ptr, node_ptr = _find_node(node_name, node_info_ptr); free(node_name); #ifdef HAVE_BG - for(i=0; i<3; i++) { + for(j=0; j<3; j++) { int norm = 0; - switch(i) { + switch(j) { case SVIEW_BG_IDLE_STATE: /* get the idle node count if * we don't have any error or