diff --git a/AUTHORS b/AUTHORS index 4811a692e77f512e9a29c4e5fe7b9c281271101c..e97f4e4aab91ff0223752e5db74a16c8704503ac 100644 --- a/AUTHORS +++ b/AUTHORS @@ -21,6 +21,7 @@ Jason King <king49(at)llnl.gov> Nancy Kritkausky <Nancy.Kritkausky(at)bull.com> Bernard Li <bli(at)bcgsc.ca> Puenlap Lee <Puen-Lap.Lee(at)bull.com> +Steven McDougall <smcdougall(at)sicortex.com> Donna Mecozzi <mecozzi1(at)llnl.gov> Chris Morrone <morrone2(at)llnl.gov> Bryan O'Sullivan <bos(at)pathscale.com> diff --git a/META b/META index 671d636cd58cfe0d09f2484a46f57ce055555a0f..7061ad1f76192f37cdfffd26364b72c61b2e928f 100644 --- a/META +++ b/META @@ -3,9 +3,9 @@ Api_revision: 0 Major: 1 Meta: 1 - Micro: 21 + Micro: 22 Minor: 2 Name: slurm Release: 1 Release_tags: - Version: 1.2.21 + Version: 1.2.22 diff --git a/NEWS b/NEWS index 512ca2c05aa310f9e68a60db8d439db4463dff0b..212ceda0a22478fbf8768e691b9c8be50abaf865 100644 --- a/NEWS +++ b/NEWS @@ -1,9 +1,33 @@ This file describes changes in recent versions of SLURM. It primarily documents those changes that are of interest to users and admins. -* Changes in SLURM 1.2.22 +* Changes in SLURM 1.2.23 ========================= +* Changes in SLURM 1.2.22 +========================= + -- In sched/wiki2, add support for MODIFYJOB option "MINSTARTTIME=<time>" + to modify a job's earliest start time. + -- In sbcast, fix bug with large files and causing sbcast to die. + -- In sched/wiki2, add support for COMMENT= option in STARTJOB and CANCELJOB + commands. + -- Avoid printing negative job run time in squeue due to clock skew. + -- In sched/wiki and sched/wiki2, add support for wiki.conf option + HidePartitionJobs (see man pages for details). + -- Update to srun/sbatch --get-user-env option logic (needed by Moab). + -- In slurmctld (for Moab) added job->details->reserved_resources field + to report resources that were kept in reserve for job while it was + pending. + -- In sched/wiki (for Maui scheduler) report a pending job's node feature + requirements (from Miguel Roa, BSC). + -- Permit a user to change a pending job's TasksPerNode specification + using scontrol (from Miguel Roa, BSC). + -- Add support for node UP/DOWN event logging in jobacct/gold plugin + WARNING: using the jobacct/gold plugin slows the system startup set the + MessageTimeout variable in the slurm.conf to around 20+. + -- Added check at start of slurmctld to look for /tmp/slurm_gold_first if + there, and using the gold plugin slurm will make record of all nodes in + downed or drained state. * Changes in SLURM 1.2.21 ========================= @@ -2766,4 +2790,4 @@ documents those changes that are of interest to users and admins. -- Change directory to /tmp in slurmd if daemonizing. -- Logfiles are reopened on reconfigure. -$Id: NEWS 12878 2007-12-20 18:20:03Z jette $ +$Id: NEWS 13118 2008-01-29 19:09:00Z da $ diff --git a/contribs/env_cache_builder.c b/contribs/env_cache_builder.c index 5bcd9149cadc79a6fbac431c26f2e98dfccc5e0d..f13d7f8e799a3e1e9c121125f8a42b7c26b782ed 100644 --- a/contribs/env_cache_builder.c +++ b/contribs/env_cache_builder.c @@ -14,7 +14,7 @@ * * This program must execute as user root. ***************************************************************************** - * Copyright (C) 2007 The Regents of the University of California. + * Copyright (C) 2007-2008 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov>. * UCRL-CODE-226842. @@ -53,16 +53,18 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <strings.h> #include <unistd.h> +#include <sys/stat.h> #include <sys/types.h> #include <sys/wait.h> -#define DEBUG 0 +#define _DEBUG 0 #define SU_WAIT_MSEC 8000 static long int _build_cache(char *user_name, char *cache_dir); static int _get_cache_dir(char *buffer, int buf_size); -static void _parse_line(char *in_line, char **user_name, int *user_id); +static int _parse_line(char *in_line, char **user_name, int *user_id); main (int argc, char **argv) { @@ -80,19 +82,17 @@ main (int argc, char **argv) exit(1); strncat(cache_dir, "/env_cache", sizeof(cache_dir)); if (mkdir(cache_dir, 0500) && (errno != EEXIST)) { - printf("Could not create cache directory %s: %s", cache_dir, + printf("Could not create cache directory %s: %s\n", cache_dir, strerror(errno)); exit(1); } -#if DEBUG printf("cache_dir=%s\n", cache_dir); -#endif for (i=1; i<argc; i++) { delta_t = _build_cache(argv[i], cache_dir); -#if DEBUG - printf("user %-8s time %ld usec\n", argv[i], delta_t); -#endif + if (delta_t < ((SU_WAIT_MSEC * 0.8) * 1000)) + continue; + printf("WARNING: user %-8s time %ld usec\n", argv[i], delta_t); } if (i > 1) exit(0); @@ -104,52 +104,76 @@ main (int argc, char **argv) } while (fgets(in_line, sizeof(in_line), passwd_fd)) { - _parse_line(in_line, &user_name, &user_id); + if (_parse_line(in_line, &user_name, &user_id) < 0) + continue; if (user_id <= 100) continue; delta_t = _build_cache(user_name, cache_dir); -#if DEBUG if (delta_t < ((SU_WAIT_MSEC * 0.8) * 1000)) continue; - printf("user %-8s time %ld usec\n", user_name, delta_t); -#endif + printf("WARNING: user %-8s time %ld usec\n", user_name, delta_t); } fclose(passwd_fd); } -/* Given a line from /etc/passwd, return the user_name and user_id */ -static void _parse_line(char *in_line, char **user_name, int *user_id) +/* Given a line from /etc/passwd, sets the user_name and user_id + * RET -1 if user can't login, 0 otherwise */ +static int _parse_line(char *in_line, char **user_name, int *user_id) { - char *tok; - + char *tok, *shell; + + /* user name */ *user_name = strtok(in_line, ":"); + (void) strtok(NULL, ":"); + + /* uid */ tok = strtok(NULL, ":"); if (tok) *user_id = atoi(tok); else { - printf("error parsing /etc/passwd: %s\n", in_line); + printf("ERROR: parsing /etc/passwd: %s\n", in_line); *user_id = 0; } + + (void) strtok(NULL, ":"); /* gid */ + (void) strtok(NULL, ":"); /* name */ + (void) strtok(NULL, ":"); /* home */ + + shell = strtok(NULL, ":"); + if (shell) { + tok = strchr(shell, '\n'); + if (tok) + tok[0] = '\0'; + if ((strcmp(shell, "/sbin/nologin") == 0) || + (strcmp(shell, "/bin/false") == 0)) + return -1; + } + + return 0; + } /* For a given user_name, get his environment variable by executing * "su - <user_name> -c env" and store the result in * cache_dir/env_<user_name> - * Returns time to perform the operation in usec + * Returns time to perform the operation in usec or -1 on error */ static long int _build_cache(char *user_name, char *cache_dir) { - FILE *su, *cache; - char line[BUFSIZ], name[BUFSIZ], value[BUFSIZ], out_file[BUFSIZ]; + FILE *cache; + char *line, *last, out_file[BUFSIZ], buffer[64 * 1024]; char *starttoken = "XXXXSLURMSTARTPARSINGHEREXXXX"; char *stoptoken = "XXXXSLURMSTOPPARSINGHEREXXXXX"; int fildes[2], found, fval, len, rc, timeleft; + int buf_read, buf_rem; pid_t child; struct timeval begin, now; struct pollfd ufds; long int delta_t; + gettimeofday(&begin, NULL); + if (pipe(fildes) < 0) { perror("pipe"); return -1; @@ -166,13 +190,14 @@ static long int _build_cache(char *user_name, char *cache_dir) dup2(fildes[1], 1); close(2); open("/dev/null", O_WRONLY); - snprintf(line, sizeof(line), - "echo; echo; echo; echo %s; env; echo %s", + snprintf(buffer, sizeof(buffer), + "/bin/echo; /bin/echo; /bin/echo; " + "/bin/echo %s; /bin/env; /bin/echo %s", starttoken, stoptoken); #ifdef LOAD_ENV_NO_LOGIN - execl("/bin/su", "su", user_name, "-c", line, NULL); + execl("/bin/su", "su", user_name, "-c", buffer, NULL); #else - execl("/bin/su", "su", "-", user_name, "-c", line, NULL); + execl("/bin/su", "su", "-", user_name, "-c", buffer, NULL); #endif exit(1); } @@ -180,30 +205,30 @@ static long int _build_cache(char *user_name, char *cache_dir) close(fildes[1]); if ((fval = fcntl(fildes[0], F_GETFL, 0)) >= 0) fcntl(fildes[0], F_SETFL, fval | O_NONBLOCK); - su = fdopen(fildes[0], "r"); - gettimeofday(&begin, NULL); ufds.fd = fildes[0]; ufds.events = POLLIN; + ufds.revents = 0; - /* First look for the start token in the output */ - len = strlen(starttoken); + /* Read all of the output from /bin/su into buffer */ found = 0; - while (!found) { + buf_read = 0; + bzero(buffer, sizeof(buffer)); + while (1) { gettimeofday(&now, NULL); timeleft = SU_WAIT_MSEC * 10; timeleft -= (now.tv_sec - begin.tv_sec) * 1000; timeleft -= (now.tv_usec - begin.tv_usec) / 1000; if (timeleft <= 0) { -#if DEBUG - printf("timeout1\n"); +#if _DEBUG + printf("timeout1 for %s\n", user_name); #endif break; } if ((rc = poll(&ufds, 1, timeleft)) <= 0) { if (rc == 0) { -#if DEBUG - printf("timeout2\n"); +#if _DEBUG + printf("timeout2 for %s\n, user_name"); #endif break; } @@ -213,88 +238,97 @@ static long int _build_cache(char *user_name, char *cache_dir) break; } if (!(ufds.revents & POLLIN)) { - perror("POLLERR|POLLHUP"); + if (ufds.revents & POLLHUP) { /* EOF */ +#if _DEBUG + printf("POLLHUP for %s\n", user_name); +#endif + found = 1; /* success */ + } else if (ufds.revents & POLLERR) { + printf("ERROR: POLLERR for %s\n", user_name); + } else { + printf("ERROR: poll() revents=%d for %s\n", + ufds.revents, user_name); + } break; } - while (fgets(line, BUFSIZ, su)) { - if (!strncmp(line, starttoken, len)) { - found = 1; - break; - } + buf_rem = sizeof(buffer) - buf_read; + if (buf_rem == 0) { + printf("ERROR: buffer overflow for %s\n", user_name); + break; + } + rc = read(fildes[0], &buffer[buf_read], buf_rem); + if (rc > 0) + buf_read += rc; + else if (rc == 0) { /* EOF */ +#if _DEBUG + printf("EOF for %s\n", user_name); +#endif + found = 1; /* success */ + break; + } else { /* error */ + perror("read"); + break; } } + close(fildes[0]); if (!found) { - printf("Failed to get current user environment variables " - "for %s\n", user_name); - close(fildes[0]); - gettimeofday(&now, NULL); - delta_t = now.tv_sec - begin.tv_sec * 1000000; - delta_t += now.tv_usec - begin.tv_usec; - if (delta_t < (SU_WAIT_MSEC * 1000)) - return (SU_WAIT_MSEC * 1000); - return delta_t; + printf("ERROR: Failed to load current user environment " + "variables for %s\n", user_name); + return -1; + } + + /* First look for the start token in the output */ + len = strlen(starttoken); + found = 0; + line = strtok_r(buffer, "\n", &last); + while (!found && line) { + if (!strncmp(line, starttoken, len)) { + found = 1; + break; + } + line = strtok_r(NULL, "\n", &last); + } + if (!found) { + printf("ERROR: Failed to get current user environment " + "variables for %s\n", user_name); + return -1; } snprintf(out_file, sizeof(out_file), "%s/%s", cache_dir, user_name); cache = fopen(out_file, "w"); if (!cache) { - printf("Could not create cache file %s: %s\n", out_file, - strerror(errno)); + printf("ERROR: Could not create cache file %s for %s: %s\n", + out_file, user_name, strerror(errno)); + return -1; } + chmod(out_file, 0600); + /* Process environment variables until we find the stop token */ len = strlen(stoptoken); found = 0; - while (!found && cache) { - gettimeofday(&now, NULL); - timeleft = SU_WAIT_MSEC * 10; - timeleft -= (now.tv_sec - begin.tv_sec) * 1000; - timeleft -= (now.tv_usec - begin.tv_usec) / 1000; - if (timeleft <= 0) { -#if DEBUG - printf("timeout3\n"); -#endif - break; - } - if ((rc = poll(&ufds, 1, timeleft)) <= 0) { - if (rc == 0) { -#if DEBUG - printf("timeout4\n"); -#endif - break; - } - if ((errno == EINTR) || (errno == EAGAIN)) - continue; - perror("poll"); - break; - } - if (!(ufds.revents & POLLIN)) { - perror("POLLERR|POLLHUP"); - break; - } - /* stop at the line containing the stoptoken string */ - if ((fgets(line, BUFSIZ, su) == 0) || - (!strncmp(line, stoptoken, len))) { + line = strtok_r(NULL, "\n", &last); + while (!found && line) { + if (!strncmp(line, stoptoken, len)) { found = 1; break; } - - if (fputs(line, cache) == EOF) { - printf("Could not write cache file %s: %s\n", - out_file, strerror(errno)); + if (fprintf(cache, "%s\n",line) < 0) { + printf("ERROR: Could not write cache file %s " + "for %s: %s\n", + out_file, user_name, strerror(errno)); found = 1; /* quit now */ } + line = strtok_r(NULL, "\n", &last); } - close(fildes[0]); - if (cache) - fclose(cache); + fclose(cache); waitpid(-1, NULL, WNOHANG); gettimeofday(&now, NULL); delta_t = (now.tv_sec - begin.tv_sec) * 1000000; delta_t += now.tv_usec - begin.tv_usec; if (!found) { - printf("Failed to get current user environment variables " - "for %s\n", user_name); + printf("ERROR: Failed to write all user environment " + "variables for %s\n", user_name); if (delta_t < (SU_WAIT_MSEC * 1000)) return (SU_WAIT_MSEC * 1000); } @@ -349,7 +383,7 @@ static int _get_cache_dir(char *buffer, int buf_size) } close(fildes[0]); if (!buffer[0]) { - printf("Failed to get StateSaveLocation\n"); + printf("ERROR: Failed to get StateSaveLocation\n"); close(fildes[0]); return -1; } diff --git a/doc/html/maui.shtml b/doc/html/maui.shtml index a76aef47205db4df9293de784b08df67b613b115..f630980dc64b6e8e8a7040b7a6737e7882af9ea3 100644 --- a/doc/html/maui.shtml +++ b/doc/html/maui.shtml @@ -37,7 +37,7 @@ maui-3.2.6p9/maui.cfg.dist to maui.conf). Add the following configuration paramters to maui.conf:</p> <pre> RMCFG[host] TYPE=WIKI -RMPORT 7321 # or whatever you choose as a port +RMPORT 7321 # selected port RMHOST host RMAUTHTYPE[host] NONE </pre> @@ -108,22 +108,40 @@ includes a description of keywords presently only supported by the sched/wiki2 plugin for use with the Moab Scheduler.</p> -<p>Only two wiki.conf parameters are used by the sched/wiki plugin: -<b>AuthKey</b> should match the key used to configure -Maui at build time and -<b>ExcludePartitions</b> can be used for SLURM to directly -schedule jobs in select partitions without Maui control. -Note that SLURM's wiki plugin does not include a mechanism -to submit new jobs, so even without this key nobody could -run jobs as another user. -Note that Maui's use of an authentication key with SLURM -is still under development. -If that support is not in place and SLURM is configured -with an <b>AuthKey</b> then communications between Maui -and SLURM will fail and the SlurmctldLog file will contain -errors of this sort: <i>error: wiki: request lacks AUTH=</i>. -If you see this error, remove <b>AuthKey</b> from SLURM's -configuration.</p> +<p>The wiki.conf keywords currently supported by Maui include:</p> + +<p><b>AuthKey</b> is a DES based encryption key used to sign +communications between SLURM and Maui or Moab. +This use of this key is essential to insure that a user +not build his own program to cancel other user's jobs in +SLURM. +This should be no more than 32-bit unsigned integer and match +the the encryption key in Maui (<i>--with-key</i> on the +configure line) or Moab (<i>KEY</i> parameter in the +<i>moab-private.cfg</i> file). +Note that SLURM's wiki plugin does not include a mechanism +to submit new jobs, so even without this key nobody could +run jobs as another user.</p> + +<p><b>ExcludePartitions</b> is used to identify partitions +whose jobs are to be scheduled directly by SLURM rather +than Maui. +These jobs will be scheduled on a First-Come-First-Served +basis. +This may provide faster response times than Maui scheduling. +Maui will account for and report the jobs, but their initiation +will be outside of Maui's control. +Note that Maui controls for resource reservation, fair share +scheduling, etc. will not apply to the initiation of these jobs. +If more than one partition is to be scheduled directly by +Slurm, use a comma separator between their names.</p> + +<p><b>HidePartitionJobs</b> identifies partitions whose jobs are not +to be reported to Maui. +These jobs will not be accounted for or otherwise visible to Maui. +Any partitions listed here must also be listed in <b>ExcludePartitions</b>. +If more than one partition is to have its jobs hidden, use a comma +separator between their names.</p> <p>Here is a sample <i>wiki.conf</i> file</p> <pre> @@ -133,8 +151,10 @@ configuration.</p> # Matches Maui's --with-key configuration parameter AuthKey=42 # -# SLURM to directly schedule "debug" partition +# SLURM to directly schedule "debug" partition +# and hide the jobs from Maui ExcludePartitions=debug +HidePartitionJobs=debug </pre> </p> diff --git a/doc/html/moab.shtml b/doc/html/moab.shtml index f0d761161f114514ba0cbb9dbc30285e66fadd38..30f40b30a524a00d0ffe0e752025d28565dc01cd 100644 --- a/doc/html/moab.shtml +++ b/doc/html/moab.shtml @@ -101,6 +101,13 @@ scheduling, etc. will not apply to the initiation of these jobs. If more than one partition is to be scheduled directly by Slurm, use a comma separator between their names.</p> +<p><b>HidePartitionJobs</b> identifies partitions whose jobs are not +to be reported to Moab. +These jobs will not be accounted for or otherwise visible to Moab. +Any partitions listed here must also be listed in <b>ExcludePartitions</b>. +If more than one partition is to have its jobs hidden, use a comma +separator between their names.</p> + <p><b>HostFormat</b> controls the format of job task lists built by Slurm and reported to Moab. The default value is "0", for which each host name is listed @@ -135,8 +142,10 @@ Possible values are "hold" and "run" with "hold" being the default.</p> # Matches KEY in moab-private.cfg AuthKey=123456789 # -# Slurm directly schedules jobs in the debug partitions +# SLURM to directly schedule "debug" partition +# and hide the jobs from Moab ExcludePartitions=debug +HidePartitionJobs=debug # # Have Moab control job scheduling JobPriority=hold @@ -146,8 +155,8 @@ EPort=15017 # Moab event notification host, where the Moab daemon runs #EHost=tux0 # -# Moab event notification throttle, matches JOBAGGREGATIONTIME -# in moab.cfg (integer value in seconds) +# Moab event notification throttle, +# matches JOBAGGREGATIONTIME in moab.cfg (seconds) JobAggregationTime=15 </pre> </p> diff --git a/doc/html/team.shtml b/doc/html/team.shtml index a2b16f1ff8e312350081d9480c225b2905598e87..74035f1e8b1d78d9cd29b82f651ac55075341745 100644 --- a/doc/html/team.shtml +++ b/doc/html/team.shtml @@ -40,6 +40,7 @@ <li>Nancy Kritkausky (Bull)</li> <li>Puenlap Lee (Bull)</li> <li>Bernard Li (Genome Sciences Centre, Canada)</li> +<li>Steven McDougall (SiCortex)</li> <li>Donna Mecozzi (LLNL)</li> <li>Pere Munt (Barcelona Supercomputer Center, Spain)<li> <li>Bryan O'Sullivan (Pathscale)</li> diff --git a/doc/man/man1/squeue.1 b/doc/man/man1/squeue.1 index b8f887ee8219a91b6714d5fd1ae5a1f0dd8bfd7b..99a5ff82113009e3cd30735d256834a575945175 100644 --- a/doc/man/man1/squeue.1 +++ b/doc/man/man1/squeue.1 @@ -58,26 +58,37 @@ A node_name of \fBlocalhost\fR is mapped to the current host name. .TP \fB\-o <output_format>\fR, \fB\-\-format=<output_format>\fR -Specify the information to be displayed. -The default format for jobs is -If the \fB\-\-long\fR option is specified, the default job format is -Format strings used internally by \fBsqueue\fR when running with -various options are +Specify the information to be displayed, its size and position +(right or left justified). +The default formats with various options are .RS .TP 15 -.I "default" +\fIdefault\fR "%.7i %.9P %.8j %.8u %.2t %.9M %.6D %R" .TP -.I "\-l, \-\-long" +\fI\-l, \-\-long\fR ".7i %.9P %.8j %.8u %.8T %.9M %.9l %.6D %R" .TP -.I "\-s, \-\-steps" +\fI\-s, \-\-steps\fR "%10i %.8j %.9P %.8u %.9M %N" .RE .IP -The field specifications available include: +The format of each field is "%[.][size]type". +.RS +.TP 8 +\fIsize\fR +is the minimum field size. +If no size is specified, whatever is needed to print the information will be used. +.TP +\fI .\fR +indicates the output should be left justified. +By default, output is right justified. +.RE + +.IP +Valid \fItype\fR specifications include: .RS .TP 4 @@ -158,6 +169,8 @@ Time used by the job or job step in days\-hours:minutes:seconds. The days and hours are printed only as needed. For job steps this field shows the elapsed time since execution began and thus will be inaccurate for job steps which have been suspended. +Clock skew between nodes in the cluster will cause the time to be inaccurate. +If the time is obviously wrong (e.g. negative), it displays as "INVALID". .TP \fB%n\fR List of node names (or base partitions on BlueGene systems) explicitly @@ -239,12 +252,6 @@ Number of requested threads per core .TP \fB%z\fR Extended processor information: number of requested sockets, cores, threads (S:C:T) per node -.TP -\fB%.<*>\fR -right justification of the field -.TP -\fB%<Number><*>\fR -size of field .RE .TP diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index 54eb34120a23b321c39755dcbc8c2db5bc518ba9..15bf0201312f86b53a49cc6e9d8dab45908e10f9 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -1,4 +1,4 @@ -\." $Id: srun.1 12842 2007-12-17 19:33:54Z jette $ +\." $Id: srun.1 13066 2008-01-23 16:34:55Z jette $ .\" .TH SRUN "1" "July 2007" "srun 1.2" "slurm components" @@ -1471,7 +1471,7 @@ These tasks initiated outside of SLURM's monitoring or control. SLURM's epilog should be configured to purge these tasks when the job's allocation is relinquished. -See \fIhttps://computing.linux.gov/linux/slurm/quickstart.html#mpi\fR +See \fIhttps://computing.llnl.gov/linux/slurm/quickstart.html#mpi\fR for more information on use of these various MPI implementation with SLURM. diff --git a/doc/man/man5/wiki.conf.5 b/doc/man/man5/wiki.conf.5 index 4690072b7c41d58eb2f25db6512026a27ac198d2..c84957fdff535b00bd1d303dda802b9b54437660 100644 --- a/doc/man/man5/wiki.conf.5 +++ b/doc/man/man5/wiki.conf.5 @@ -70,6 +70,16 @@ If more than one partition is to be scheduled directly by SLURM, use a comma separator between their names. This may provide faster response times than Moab/Maui scheduling. +.TP +\fBHidePartitionJobs\fR +Identifies partitions whose jobs are not to be reported to Moab/Maui. +These jobs will not be accounted for or otherwise visible to Moab/Maui. +Any partitions listed here must also be listed in \fBExcludePartitions\fR. +This only effects jobs which are submitted using SLURM commands (i.e. +\fIsrun\fR, \fIsalloc\fR or \fIsbatch\fR, NOT \fImsub\fR from Moab). +If more than one partition is to have its jobs hidden, use a comma +separator between their names. + .TP \fBHostFormat\fR Controls the format of host lists exchanged between SLURM and Moab. diff --git a/slurm.spec b/slurm.spec index c1351af045b8010c0bf73cfc59ca9d87c7e8329a..de1d4ded7900b27aa0c0872f0c930fdfcc8aad41 100644 --- a/slurm.spec +++ b/slurm.spec @@ -1,4 +1,4 @@ -# $Id: slurm.spec 12767 2007-12-03 17:00:12Z jette $ +# $Id: slurm.spec 13075 2008-01-23 20:39:30Z da $ # # Note that this package is not relocatable @@ -60,14 +60,14 @@ %endif Name: slurm -Version: 1.2.21 +Version: 1.2.22 Release: 1 Summary: Simple Linux Utility for Resource Management License: GPL Group: System Environment/Base -Source: slurm-1.2.21.tar.bz2 +Source: slurm-1.2.22.tar.bz2 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release} URL: https://computing.llnl.gov/linux/slurm/ BuildRequires: openssl-devel >= 0.9.6 openssl >= 0.9.6 @@ -211,7 +211,7 @@ SLURM process tracking plugin for SGI job containers. ############################################################################# %prep -%setup -n slurm-1.2.21 +%setup -n slurm-1.2.22 %build %configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \ @@ -357,6 +357,7 @@ rm -rf $RPM_BUILD_ROOT %dir %{_libdir}/slurm %{_libdir}/slurm/checkpoint_none.so %{_libdir}/slurm/checkpoint_ompi.so +%{_libdir}/slurm/jobacct_gold.so %{_libdir}/slurm/jobacct_linux.so %{_libdir}/slurm/jobacct_none.so %{_libdir}/slurm/jobcomp_none.so diff --git a/src/common/env.c b/src/common/env.c index c96fee623c91faf60b76b813fce05ecb36252281..741152a0c14d7d976536e588c0247a7144c241a9 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/common/env.c - add an environment variable to environment vector - * $Id: env.c 12884 2007-12-21 00:41:34Z jette $ + * $Id: env.c 12970 2008-01-07 20:16:53Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -1280,15 +1280,14 @@ char **_load_env_cache(const char *username) */ char **env_array_user_default(const char *username, int timeout, int mode) { - FILE *su; - char line[ENV_BUFSIZE]; - char name[ENV_BUFSIZE]; - char value[ENV_BUFSIZE]; + char *line, *last, name[128], value[ENV_BUFSIZE]; + char buffer[ENV_BUFSIZE]; char **env = NULL; char *starttoken = "XXXXSLURMSTARTPARSINGHEREXXXX"; char *stoptoken = "XXXXSLURMSTOPPARSINGHEREXXXXX"; char cmdstr[256]; int fildes[2], found, fval, len, rc, timeleft; + int buf_read, buf_rem; pid_t child; struct timeval begin, now; struct pollfd ufds; @@ -1315,7 +1314,8 @@ char **env_array_user_default(const char *username, int timeout, int mode) close(2); open("/dev/null", O_WRONLY); snprintf(cmdstr, sizeof(cmdstr), - "/bin/echo; /bin/echo; /bin/echo; /bin/echo %s; /bin/env; /bin/echo %s", + "/bin/echo; /bin/echo; /bin/echo; " + "/bin/echo %s; /bin/env; /bin/echo %s", starttoken, stoptoken); if (mode == 1) execl("/bin/su", "su", username, "-c", cmdstr, NULL); @@ -1334,25 +1334,27 @@ char **env_array_user_default(const char *username, int timeout, int mode) close(fildes[1]); if ((fval = fcntl(fildes[0], F_GETFL, 0)) >= 0) fcntl(fildes[0], F_SETFL, fval | O_NONBLOCK); - su= fdopen(fildes[0], "r"); gettimeofday(&begin, NULL); ufds.fd = fildes[0]; ufds.events = POLLIN; - /* First look for the start token in the output */ - len = strlen(starttoken); + /* Read all of the output from /bin/su into buffer */ found = 0; - while (!found) { + buf_read = 0; + bzero(buffer, sizeof(buffer)); + while (1) { gettimeofday(&now, NULL); - if (timeout > 0) + if (timeout) timeleft = timeout * 1000; else timeleft = SU_WAIT_MSEC; timeleft -= (now.tv_sec - begin.tv_sec) * 1000; timeleft -= (now.tv_usec - begin.tv_usec) / 1000; - if (timeleft <= 0) + if (timeleft <= 0) { + verbose("timeout waiting for /bin/su to complete"); break; + } if ((rc = poll(&ufds, 1, timeleft)) <= 0) { if (rc == 0) { verbose("timeout waiting for /bin/su to complete"); @@ -1360,63 +1362,76 @@ char **env_array_user_default(const char *username, int timeout, int mode) } if ((errno == EINTR) || (errno == EAGAIN)) continue; - error("poll: %m"); + error("poll(): %m"); break; } - if (!(ufds.revents & POLLIN)) - break; - while (fgets(line, ENV_BUFSIZE, su)) { - if (!strncmp(line, starttoken, len)) { - found = 1; - break; + if (!(ufds.revents & POLLIN)) { + if (ufds.revents & POLLHUP) { /* EOF */ + found = 1; /* success */ + } else if (ufds.revents & POLLERR) { + error("POLLERR"); + } else { + error("poll() revents=%d", ufds.revents); } + break; + } + buf_rem = sizeof(buffer) - buf_read; + if (buf_rem == 0) { + error("buffer overflow loading env vars"); + break; + } + rc = read(fildes[0], &buffer[buf_read], buf_rem); + if (rc > 0) + buf_read += rc; + else if (rc == 0) { /* EOF */ + found = 1; /* success */ + break; + } else { /* error */ + error("read(env pipe): %m"); + break; } } + close(fildes[0]); + if (!found) { + error("Failed to load current user environment variables"); + _load_env_cache(username); + } + + /* First look for the start token in the output */ + len = strlen(starttoken); + found = 0; + line = strtok_r(buffer, "\n", &last); + while (!found && line) { + if (!strncmp(line, starttoken, len)) { + found = 1; + break; + } + line = strtok_r(NULL, "\n", &last); + } if (!found) { error("Failed to get current user environment variables"); - close(fildes[0]); return _load_env_cache(username); } - /* Now read in the environment variable strings. */ - env = env_array_create(); + /* Process environment variables until we find the stop token */ len = strlen(stoptoken); found = 0; - while (!found) { - gettimeofday(&now, NULL); - if (timeout > 0) - timeleft = timeout * 1000; - else - timeleft = SU_WAIT_MSEC; - timeleft -= (now.tv_sec - begin.tv_sec) * 1000; - timeleft -= (now.tv_usec - begin.tv_usec) / 1000; - if (timeleft <= 0) - break; - if ((rc = poll(&ufds, 1, timeleft)) <= 0) { - if (rc == 0) { - verbose("timeout waiting for /bin/su to complete"); - break; - } - if ((errno == EINTR) || (errno == EAGAIN)) - continue; - error("poll: %m"); - break; - } - /* stop at the line containing the stoptoken string */ - if (!(ufds.revents & POLLIN)) - break; - if ((fgets(line, ENV_BUFSIZE, su) == 0) || - (!strncmp(line, stoptoken, len))) { + env = env_array_create(); + line = strtok_r(NULL, "\n", &last); + while (!found && line) { + if (!strncmp(line, stoptoken, len)) { found = 1; break; } - - _strip_cr_nl(line); - _env_array_entry_splitter(line, name, ENV_BUFSIZE, value, - ENV_BUFSIZE); - env_array_overwrite(&env, name, value); + if (_env_array_entry_splitter(line, name, sizeof(name), + value, sizeof(value))) + env_array_overwrite(&env, name, value); + line = strtok_r(NULL, "\n", &last); + } + if (!found) { + error("Failed to get all user environment variables"); + return _load_env_cache(username); } - close(fildes[0]); return env; } diff --git a/src/common/slurm_jobacct.c b/src/common/slurm_jobacct.c index 1f8d74d9b40ee975e3b127a91f37321574049f6f..41a55ff0439c30985801b9406cec81d4905a41b3 100644 --- a/src/common/slurm_jobacct.c +++ b/src/common/slurm_jobacct.c @@ -103,6 +103,12 @@ typedef struct slurm_jobacct_ops { jobacctinfo_t *(*jobacct_remove_task)(pid_t pid); void (*jobacct_suspend_poll) (); void (*jobacct_resume_poll) (); + int (*jobacct_node_down) (struct node_record *node_ptr, + time_t event_time, char *reason); + int (*jobacct_node_up) (struct node_record *node_ptr, + time_t event_time); + int (*jobacct_cluster_procs) (uint32_t procs, time_t event_time); + } slurm_jobacct_ops_t; /* @@ -207,7 +213,10 @@ _slurm_jobacct_get_ops( slurm_jobacct_context_t *c ) "jobacct_p_stat_task", "jobacct_p_remove_task", "jobacct_p_suspend_poll", - "jobacct_p_resume_poll" + "jobacct_p_resume_poll", + "jobacct_p_node_down", + "jobacct_p_node_up", + "jobacct_p_cluster_procs" }; int n_syms = sizeof( syms ) / sizeof( char * ); int rc = 0; @@ -639,3 +648,45 @@ extern void jobacct_g_resume_poll() return; } +extern int jobacct_g_node_down(struct node_record *node_ptr, time_t event_time, + char *reason) +{ + int retval = SLURM_SUCCESS; + if (_slurm_jobacct_init() < 0) + return SLURM_ERROR; + + slurm_mutex_lock( &g_jobacct_context_lock ); + if ( g_jobacct_context ) + retval = (*(g_jobacct_context->ops.jobacct_node_down)) + (node_ptr, event_time, reason); + slurm_mutex_unlock( &g_jobacct_context_lock ); + return retval; +} + +extern int jobacct_g_node_up(struct node_record *node_ptr, time_t event_time) +{ + int retval = SLURM_SUCCESS; + if (_slurm_jobacct_init() < 0) + return SLURM_ERROR; + + slurm_mutex_lock( &g_jobacct_context_lock ); + if ( g_jobacct_context ) + retval = (*(g_jobacct_context->ops.jobacct_node_up)) + (node_ptr, event_time); + slurm_mutex_unlock( &g_jobacct_context_lock ); + return retval; +} + +extern int jobacct_g_cluster_procs(uint32_t procs, time_t event_time) +{ + int retval = SLURM_SUCCESS; + if (_slurm_jobacct_init() < 0) + return SLURM_ERROR; + + slurm_mutex_lock( &g_jobacct_context_lock ); + if ( g_jobacct_context ) + retval = (*(g_jobacct_context->ops.jobacct_cluster_procs)) + (procs, event_time); + slurm_mutex_unlock( &g_jobacct_context_lock ); + return retval; +} diff --git a/src/common/slurm_jobacct.h b/src/common/slurm_jobacct.h index ec9ac6dace49cc2c195eb095e149551ff8775237..a7c4ac55e139746c633511b2f44bb7f9f73d83af 100644 --- a/src/common/slurm_jobacct.h +++ b/src/common/slurm_jobacct.h @@ -102,5 +102,15 @@ extern jobacctinfo_t *jobacct_g_remove_task(pid_t pid); extern void jobacct_g_suspend_poll(); extern void jobacct_g_resume_poll(); +/* functions only to be used by the gold plugin in 1.2 since we had to + * make some mods to get the plugin to work with node states also + */ +extern int jobacct_g_node_down(struct node_record *node_ptr, time_t event_time, + char *reason); +extern int jobacct_g_node_up(struct node_record *node_ptr, time_t event_time); +extern int jobacct_g_cluster_procs(uint32_t procs, time_t event_time); + + + #endif /*__SLURM_JOBACCT_H__*/ diff --git a/src/plugins/jobacct/aix/jobacct_aix.c b/src/plugins/jobacct/aix/jobacct_aix.c index 4d2d7ac8c57706fd14900ef680424b1123f3a08f..98b6545e743f232a6445c08674016f5a57a869f0 100644 --- a/src/plugins/jobacct/aix/jobacct_aix.c +++ b/src/plugins/jobacct/aix/jobacct_aix.c @@ -310,6 +310,20 @@ void jobacct_p_resume_poll() common_resume_poll(); } +extern int jobacct_p_node_down(struct node_record *node_ptr, + time_t event_time, char *reason) +{ + return SLURM_SUCCESS; +} +extern int jobacct_p_node_up(struct node_record *node_ptr, time_t event_time) +{ + return SLURM_SUCCESS; +} +extern int jobacct_p_cluster_procs(uint32_t procs, time_t event_time) +{ + return SLURM_SUCCESS; +} + #ifdef HAVE_AIX /* diff --git a/src/plugins/jobacct/gold/gold_interface.c b/src/plugins/jobacct/gold/gold_interface.c index fe94846a82a81d8941acb5f245bb64205eb0fde4..8ca66f540e88b00f9ee235c7d8eb33ce6a55b64d 100644 --- a/src/plugins/jobacct/gold/gold_interface.c +++ b/src/plugins/jobacct/gold/gold_interface.c @@ -47,12 +47,12 @@ #define MAX_RETRY 5 -static slurm_fd gold_fd; // gold connection static char *gold_machine = NULL; static char *gold_key = NULL; static char *gold_host = NULL; static uint16_t gold_port = 0; static int gold_init = 0; +pthread_mutex_t gold_mutex = PTHREAD_MUTEX_INITIALIZER; static char *_get_return_value(char *gold_msg, int *i) { @@ -129,17 +129,18 @@ static gold_response_entry_t *_create_response_entry(char *object, return resp_entry; } -static int _start_communication() +static slurm_fd _start_communication() { static slurm_addr gold_addr; static int gold_addr_set = 0; char *init_msg = "POST /SSSRMAP3 HTTP/1.1\r\nContent-Type: text/xml; charset=\"utf-8\"\r\nTransfer-Encoding: chunked\r\n\r\n"; int rc = 0; - + slurm_fd gold_fd = 0; + if(!gold_init) { error("start_gold_communication: " "need to run setup_gold_info before this"); - return SLURM_ERROR; + return 0; } if(!gold_addr_set) { @@ -149,7 +150,7 @@ static int _start_communication() if ((gold_fd = slurm_open_msg_conn(&gold_addr)) < 0) { error("start_gold_communication to %s: %m", gold_host); - return SLURM_ERROR; + return 0; } debug3("Connected to %s(%d)", gold_host, gold_port); @@ -159,12 +160,12 @@ static int _start_communication() if (rc < 0) { error("_slurm_send_timeout: %m"); - return SLURM_ERROR; + return 0; } - return SLURM_SUCCESS; + return gold_fd; } -static int _end_communication() +static int _end_communication(slurm_fd gold_fd) { int rc = SLURM_SUCCESS; int retry = 0; @@ -208,7 +209,7 @@ extern int init_gold(char *machine, char *keyfile, char *host, uint16_t port) /* Close the file */ close(fp); - info("got the tolken as %s\n", key); + //debug4("got the tolken as %s\n", key); gold_machine = xstrdup(machine); gold_key = xstrdup(key); gold_host = xstrdup(host); @@ -273,11 +274,13 @@ extern int gold_request_add_assignment(gold_request_t *gold_request, } extern int gold_request_add_condition(gold_request_t *gold_request, - char *name, char *value) + char *name, char *value, + gold_operator_t op) { gold_name_value_t *name_val = xmalloc(sizeof(gold_name_value_t)); name_val->name = xstrdup(name); name_val->value = xstrdup(value); + name_val->op = op; list_push(gold_request->conditions, name_val); return SLURM_SUCCESS; @@ -309,6 +312,7 @@ extern gold_response_t *get_gold_response(gold_request_t *gold_request) gold_name_value_t *name_val = NULL; ListIterator itr = NULL; int rc = 0, i = 0; + slurm_fd gold_fd = 0; if(!gold_init) { error("get_gold_response: " @@ -339,12 +343,16 @@ extern gold_response_t *get_gold_response(gold_request_t *gold_request) case GOLD_OBJECT_JOB: object = GOLD_OBJECT_JOB_STR; break; + case GOLD_OBJECT_EVENT: + object = GOLD_OBJECT_EVENT_STR; + break; case GOLD_OBJECT_ROLEUSER: object = GOLD_OBJECT_ROLEUSER_STR; break; default: error("get_gold_response: " "unsupported object %d", gold_request->object); + return NULL; } switch(gold_request->action) { @@ -386,8 +394,37 @@ extern gold_response_t *get_gold_response(gold_request_t *gold_request) itr = list_iterator_create(gold_request->conditions); while((name_val = list_next(itr))) { - xstrfmtcat(innerds, "<Where name=\"%s\">%s</Where>", - name_val->name, name_val->value); + if(name_val->op != GOLD_OPERATOR_NONE) { + char *op = NULL; + switch (name_val->op) { + case GOLD_OPERATOR_G : + op = "G"; + break; + case GOLD_OPERATOR_GE : + op = "GE"; + break; + case GOLD_OPERATOR_L : + op = "L"; + break; + case GOLD_OPERATOR_LE : + op = "LE"; + break; + default: + error("Unknown operator '%d' " + "given to this condition %s = %s", + name_val->op, name_val->name, + name_val->value); + xfree(innerds); + list_iterator_destroy(itr); + return NULL; + } + xstrfmtcat(innerds, + "<Where name=\"%s\" op=\"%s\">%s</Where>", + name_val->name, op, name_val->value); + } else { + xstrfmtcat(innerds, "<Where name=\"%s\">%s</Where>", + name_val->name, name_val->value); + } } list_iterator_destroy(itr); @@ -422,11 +459,15 @@ extern gold_response_t *get_gold_response(gold_request_t *gold_request) /* I wish gold could do persistant connections but it only * does one and then ends it so we have to do that also so - * every time we start a connection we have to finish it. + * every time we start a connection we have to finish it. As + * since we can only send one thing at a time we have to lock + * the connection. */ - if(_start_communication() == SLURM_ERROR) +// slurm_mutex_lock(&gold_mutex); + if(!(gold_fd = _start_communication())) { + //slurm_mutex_unlock(&gold_mutex); return NULL; - + } rc = _slurm_send_timeout(gold_fd, tmp_buff, strlen(tmp_buff), SLURM_PROTOCOL_NO_SEND_RECV_FLAGS, timeout); @@ -529,8 +570,9 @@ error: * does one and then ends it so we have to do that also so * every time we start a connection we have to finish it. */ - _end_communication(); - + _end_communication(gold_fd); +// slurm_mutex_unlock(&gold_mutex); + return gold_response; } diff --git a/src/plugins/jobacct/gold/gold_interface.h b/src/plugins/jobacct/gold/gold_interface.h index 5fedb9fdb73519c59c03a404c3e42d314dfde6db..5ee82ef9dd43341c077633a4d4642d06e33a6584 100644 --- a/src/plugins/jobacct/gold/gold_interface.h +++ b/src/plugins/jobacct/gold/gold_interface.h @@ -70,6 +70,7 @@ #define GOLD_OBJECT_MACHINE_STR "Machine" #define GOLD_OBJECT_JOB_STR "Job" #define GOLD_OBJECT_ROLEUSER_STR "RoleUser" +#define GOLD_OBJECT_EVENT_STR "EventLog" typedef enum { GOLD_ACTION_QUERY, @@ -84,12 +85,22 @@ typedef enum { GOLD_OBJECT_PROJECT, GOLD_OBJECT_MACHINE, GOLD_OBJECT_JOB, - GOLD_OBJECT_ROLEUSER + GOLD_OBJECT_ROLEUSER, + GOLD_OBJECT_EVENT } gold_object_t; +typedef enum { + GOLD_OPERATOR_NONE, + GOLD_OPERATOR_G, + GOLD_OPERATOR_GE, + GOLD_OPERATOR_L, + GOLD_OPERATOR_LE +} gold_operator_t; + typedef struct { char *name; char *value; + gold_operator_t op; } gold_name_value_t; typedef struct { @@ -125,7 +136,8 @@ extern int destroy_gold_request(gold_request_t *gold_request); extern int gold_request_add_assignment(gold_request_t *gold_request, char *name, char *value); extern int gold_request_add_condition(gold_request_t *gold_request, - char *name, char *value); + char *name, char *value, + gold_operator_t op); extern int gold_request_add_selection(gold_request_t *gold_request, char *name); extern gold_response_t *get_gold_response(gold_request_t *gold_request); diff --git a/src/plugins/jobacct/gold/jobacct_gold.c b/src/plugins/jobacct/gold/jobacct_gold.c index 0f66b0afa6a9ef9d16cfa14dd57de1b8421989e4..1a06564350897d74d688c0a4344c7a1f2c48ab75 100644 --- a/src/plugins/jobacct/gold/jobacct_gold.c +++ b/src/plugins/jobacct/gold/jobacct_gold.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * jobacct_gold.c - jobacct interface to gold. * - * $Id: jobacct_gold.c 12869 2007-12-20 17:22:25Z da $ + * $Id: jobacct_gold.c 13077 2008-01-23 22:31:44Z da $ ***************************************************************************** * Copyright (C) 2004-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -39,11 +39,13 @@ #include <stdlib.h> #include <ctype.h> +#include <sys/stat.h> #include "src/common/xmalloc.h" #include "src/common/list.h" #include "src/common/xstring.h" #include "src/common/uid.h" +#include <src/common/parse_time.h> #include "src/slurmctld/slurmctld.h" #include "src/slurmd/slurmd/slurmd.h" @@ -51,6 +53,13 @@ #include "src/common/slurm_protocol_api.h" +typedef struct { + char *user; + char *project; + char *machine; + char *gold_id; +} gold_account_t; + /* * These variables are required by the generic plugin interface. If they * are not found in the plugin, the plugin loader will ignore it. @@ -89,6 +98,7 @@ const uint32_t plugin_version = 100; */ static char *cluster_name = NULL; +static List gold_account_list = NULL; /* _check_for_job * IN jobid - job id to check for @@ -96,6 +106,18 @@ static char *cluster_name = NULL; * RET 0 for not found 1 for found */ +static void _destroy_gold_account(void *object) +{ + gold_account_t *gold_account = (gold_account_t *) object; + if(gold_account) { + xfree(gold_account->user); + xfree(gold_account->project); + xfree(gold_account->machine); + xfree(gold_account->gold_id); + xfree(gold_account); + } +} + static int _check_for_job(uint32_t jobid, time_t submit) { gold_request_t *gold_request = create_gold_request(GOLD_OBJECT_JOB, @@ -110,10 +132,12 @@ static int _check_for_job(uint32_t jobid, time_t submit) gold_request_add_selection(gold_request, "JobId"); snprintf(tmp_buff, sizeof(tmp_buff), "%u", jobid); - gold_request_add_condition(gold_request, "JobId", tmp_buff); + gold_request_add_condition(gold_request, "JobId", tmp_buff, + GOLD_OPERATOR_NONE); snprintf(tmp_buff, sizeof(tmp_buff), "%u", (int)submit); - gold_request_add_condition(gold_request, "SubmitTime", tmp_buff); + gold_request_add_condition(gold_request, "SubmitTime", tmp_buff, + GOLD_OPERATOR_NONE); gold_response = get_gold_response(gold_request); destroy_gold_request(gold_request); @@ -132,18 +156,38 @@ static int _check_for_job(uint32_t jobid, time_t submit) static char *_get_account_id(char *user, char *project, char *machine) { - gold_request_t *gold_request = create_gold_request(GOLD_OBJECT_ACCOUNT, - GOLD_ACTION_QUERY); + gold_request_t *gold_request = NULL; gold_response_t *gold_response = NULL; char *gold_account_id = NULL; gold_response_entry_t *resp_entry = NULL; gold_name_value_t *name_val = NULL; + gold_account_t *gold_account = NULL; + ListIterator itr = list_iterator_create(gold_account_list); + + while((gold_account = list_next(itr))) { + if(user && strcmp(gold_account->user, user)) + continue; + if(project && strcmp(gold_account->project, project)) + continue; + gold_account_id = xstrdup(gold_account->gold_id); + break; + } + list_iterator_destroy(itr); + + if(gold_account_id) + return gold_account_id; + + gold_request = create_gold_request(GOLD_OBJECT_ACCOUNT, + GOLD_ACTION_QUERY); gold_request_add_selection(gold_request, "Id"); - gold_request_add_condition(gold_request, "User", user); + gold_request_add_condition(gold_request, "User", user, + GOLD_OPERATOR_NONE); if(project) - gold_request_add_condition(gold_request, "Project", project); - gold_request_add_condition(gold_request, "Machine", machine); + gold_request_add_condition(gold_request, "Project", project, + GOLD_OPERATOR_NONE); + gold_request_add_condition(gold_request, "Machine", machine, + GOLD_OPERATOR_NONE); gold_response = get_gold_response(gold_request); destroy_gold_request(gold_request); @@ -161,6 +205,15 @@ static char *_get_account_id(char *user, char *project, char *machine) destroy_gold_name_value(name_val); destroy_gold_response_entry(resp_entry); + /* no need to keep track of machine since this is + * always going to be on the same machine. + */ + gold_account = xmalloc(sizeof(gold_account_t)); + gold_account->user = xstrdup(user); + gold_account->gold_id = xstrdup(gold_account_id); + if(project) + gold_account->project = xstrdup(project); + list_push(gold_account_list, gold_account); } else { error("no account found returning 0"); gold_account_id = xstrdup("0"); @@ -181,7 +234,7 @@ static int _add_edit_job(struct job_record *job_ptr, gold_object_t action) char *gold_account_id = NULL; char *user = uid_to_string((uid_t)job_ptr->user_id); char *jname = NULL; - int ncpus=0, tmp = 0, i = 0; + int tmp = 0, i = 0; char *account = NULL; char *nodes = "(null)"; @@ -205,48 +258,34 @@ static int _add_edit_job(struct job_record *job_ptr, gold_object_t action) if (job_ptr->nodes && job_ptr->nodes[0]) nodes = job_ptr->nodes; - for (i=0; i < job_ptr->num_cpu_groups; i++) { - ncpus += (job_ptr->cpus_per_node[i]) - * (job_ptr->cpu_count_reps[i]); - //info("got %d from %d * %d", ncpus, job_ptr->cpus_per_node[i], - // job_ptr->cpu_count_reps[i]); - } + //info("total procs is %d", job_ptr->details->total_procs); if(action == GOLD_ACTION_CREATE) { snprintf(tmp_buff, sizeof(tmp_buff), "%u", job_ptr->job_id); gold_request_add_assignment(gold_request, "JobId", tmp_buff); + snprintf(tmp_buff, sizeof(tmp_buff), "%u", + (int)job_ptr->details->submit_time); + gold_request_add_assignment(gold_request, "SubmitTime", + tmp_buff); + gold_account_id = _get_account_id(user, account, cluster_name); - + gold_request_add_assignment(gold_request, "GoldAccountId", gold_account_id); xfree(gold_account_id); - - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - (int)job_ptr->details->submit_time); - gold_request_add_assignment(gold_request, "SubmitTime", - tmp_buff); - + } else if (action == GOLD_ACTION_MODIFY) { snprintf(tmp_buff, sizeof(tmp_buff), "%u", job_ptr->job_id); - gold_request_add_condition(gold_request, "JobId", tmp_buff); + gold_request_add_condition(gold_request, "JobId", tmp_buff, + GOLD_OPERATOR_NONE); snprintf(tmp_buff, sizeof(tmp_buff), "%u", (int)job_ptr->details->submit_time); gold_request_add_condition(gold_request, "SubmitTime", - tmp_buff); - - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - (int)job_ptr->end_time); - gold_request_add_assignment(gold_request, "EndTime", - tmp_buff); - - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - (int)job_ptr->exit_code); - gold_request_add_assignment(gold_request, "ExitCode", - tmp_buff); - + tmp_buff, + GOLD_OPERATOR_NONE); } else { destroy_gold_request(gold_request); error("_add_edit_job: bad action given %d", action); @@ -256,11 +295,13 @@ static int _add_edit_job(struct job_record *job_ptr, gold_object_t action) gold_request_add_assignment(gold_request, "Partition", job_ptr->partition); - snprintf(tmp_buff, sizeof(tmp_buff), "%u", job_ptr->num_procs); - gold_request_add_assignment(gold_request, "RequestedCPUS", + snprintf(tmp_buff, sizeof(tmp_buff), "%u", + job_ptr->details->total_procs); + gold_request_add_assignment(gold_request, "RequestedCPUCount", tmp_buff); - snprintf(tmp_buff, sizeof(tmp_buff), "%u", ncpus); - gold_request_add_assignment(gold_request, "AllocatedCPUS", + snprintf(tmp_buff, sizeof(tmp_buff), "%u", + job_ptr->details->total_procs); + gold_request_add_assignment(gold_request, "AllocatedCPUCount", tmp_buff); gold_request_add_assignment(gold_request, "NodeList", @@ -269,8 +310,19 @@ static int _add_edit_job(struct job_record *job_ptr, gold_object_t action) gold_request_add_assignment(gold_request, "JobName", jname); xfree(jname); - -/* gold_request_add_assignment(gold_request, "CPUSecondsReserved", */ + + if(job_ptr->job_state != JOB_RUNNING) { + snprintf(tmp_buff, sizeof(tmp_buff), "%u", + (int)job_ptr->end_time); + gold_request_add_assignment(gold_request, "EndTime", + tmp_buff); + + snprintf(tmp_buff, sizeof(tmp_buff), "%u", + (int)job_ptr->exit_code); + gold_request_add_assignment(gold_request, "ExitCode", + tmp_buff); + } +/* gold_request_add_assignment(gold_request, "ReservedCPUSeconds", */ /* ); */ @@ -287,9 +339,7 @@ static int _add_edit_job(struct job_record *job_ptr, gold_object_t action) snprintf(tmp_buff, sizeof(tmp_buff), "%u", job_ptr->job_state & (~JOB_COMPLETING)); gold_request_add_assignment(gold_request, "State", - tmp_buff); - - + tmp_buff); gold_response = get_gold_response(gold_request); destroy_gold_request(gold_request); @@ -393,8 +443,17 @@ int jobacct_p_init_slurmctld(char *gold_info) uint16_t port = 0; debug2("jobacct_init() called"); + if(cluster_name) { + info("already called init"); + return SLURM_SUCCESS; + } if(gold_info) total = gold_info; + + if(!gold_account_list) + gold_account_list = list_create(_destroy_gold_account); + + i = 0; while(total[j]) { if(total[j] == ':') { @@ -455,6 +514,8 @@ int jobacct_p_init_slurmctld(char *gold_info) int jobacct_p_fini_slurmctld() { xfree(cluster_name); + if(gold_account_list) + list_destroy(gold_account_list); fini_gold(); return SLURM_SUCCESS; } @@ -553,3 +614,286 @@ void jobacct_p_resume_poll() { return; } + +#define _DEBUG 0 + +extern int jobacct_p_node_down(struct node_record *node_ptr, time_t event_time, + char *reason) +{ + uint16_t cpus; + int rc = SLURM_ERROR; + gold_request_t *gold_request = NULL; + gold_response_t *gold_response = NULL; + char tmp_buff[50]; + + if (slurmctld_conf.fast_schedule) + cpus = node_ptr->config_ptr->cpus; + else + cpus = node_ptr->cpus; + +#if _DEBUG + slurm_make_time_str(&event_time, tmp_buff, sizeof(tmp_buff)); + info("Node_acct_down: %s at %s with %u cpus due to %s", + node_ptr->name, tmp_buff, cpus, node_ptr->reason); +#endif + /* If the node was already down end that record since the + * reason will most likely be different + */ + + gold_request = create_gold_request(GOLD_OBJECT_EVENT, + GOLD_ACTION_MODIFY); + if(!gold_request) + return rc; + + gold_request_add_condition(gold_request, "Machine", cluster_name, + GOLD_OPERATOR_NONE); + gold_request_add_condition(gold_request, "EndTime", "0", + GOLD_OPERATOR_NONE); + gold_request_add_condition(gold_request, "Name", node_ptr->name, + GOLD_OPERATOR_NONE); + + snprintf(tmp_buff, sizeof(tmp_buff), "%d", ((int)event_time - 1)); + gold_request_add_assignment(gold_request, "EndTime", tmp_buff); + + gold_response = get_gold_response(gold_request); + destroy_gold_request(gold_request); + + if(!gold_response) { + error("jobacct_p_cluster_procs: no response received"); + return rc; + } + + if(gold_response->rc) { + error("gold_response has non-zero rc(%d): %s", + gold_response->rc, + gold_response->message); + destroy_gold_response(gold_response); + return rc; + } + destroy_gold_response(gold_response); + + /* now add the new one */ + gold_request = create_gold_request(GOLD_OBJECT_EVENT, + GOLD_ACTION_CREATE); + if(!gold_request) + return rc; + + gold_request_add_assignment(gold_request, "Machine", cluster_name); + snprintf(tmp_buff, sizeof(tmp_buff), "%d", (int)event_time); + gold_request_add_assignment(gold_request, "StartTime", tmp_buff); + gold_request_add_assignment(gold_request, "Name", node_ptr->name); + snprintf(tmp_buff, sizeof(tmp_buff), "%u", node_ptr->cpus); + gold_request_add_assignment(gold_request, "CPUCount", tmp_buff); + if(reason) + gold_request_add_assignment(gold_request, "Reason", reason); + else + gold_request_add_assignment(gold_request, "Reason", + node_ptr->reason); + + gold_response = get_gold_response(gold_request); + destroy_gold_request(gold_request); + + if(!gold_response) { + error("jobacct_p_cluster_procs: no response received"); + return rc; + } + + if(!gold_response->rc) + rc = SLURM_SUCCESS; + else { + error("gold_response has non-zero rc(%d): %s", + gold_response->rc, + gold_response->message); + } + destroy_gold_response(gold_response); + + return rc; +} + +extern int jobacct_p_node_up(struct node_record *node_ptr, time_t event_time) +{ + int rc = SLURM_ERROR; + gold_request_t *gold_request = NULL; + gold_response_t *gold_response = NULL; + char tmp_buff[50]; + +#if _DEBUG + slurm_make_time_str(&event_time, tmp_buff, sizeof(tmp_buff)); + info("Node_acct_up: %s at %s", node_ptr->name, tmp_buff); +#endif + /* FIXME: WRITE TO DATABASE HERE */ + + gold_request = create_gold_request(GOLD_OBJECT_EVENT, + GOLD_ACTION_MODIFY); + if(!gold_request) + return rc; + + gold_request_add_condition(gold_request, "Machine", cluster_name, + GOLD_OPERATOR_NONE); + gold_request_add_condition(gold_request, "EndTime", "0", + GOLD_OPERATOR_NONE); + gold_request_add_condition(gold_request, "Name", node_ptr->name, + GOLD_OPERATOR_NONE); + + snprintf(tmp_buff, sizeof(tmp_buff), "%d", ((int)event_time - 1)); + gold_request_add_assignment(gold_request, "EndTime", tmp_buff); + + gold_response = get_gold_response(gold_request); + destroy_gold_request(gold_request); + + if(!gold_response) { + error("jobacct_p_node_up: no response received"); + return rc; + } + + if(gold_response->rc) { + error("gold_response has non-zero rc(%d): %s", + gold_response->rc, + gold_response->message); + destroy_gold_response(gold_response); + return rc; + } + destroy_gold_response(gold_response); + + + return rc; +} + +extern int jobacct_p_cluster_procs(uint32_t procs, time_t event_time) +{ + static uint32_t last_procs = -1; + gold_request_t *gold_request = NULL; + gold_response_t *gold_response = NULL; + char tmp_buff[50]; + int rc = SLURM_ERROR; + bool no_modify = 0; + if (procs == last_procs) { + debug3("we have the same procs as before no need to " + "query the database."); + return SLURM_SUCCESS; + } + last_procs = procs; + + /* Record the processor count */ +#if _DEBUG + slurm_make_time_str(&event_time, tmp_buff, sizeof(tmp_buff)); + info("Node_acct_procs: %s has %u total CPUs at %s", + cluster_name, procs, tmp_buff); +#endif + + /* get the last known one */ + gold_request = create_gold_request(GOLD_OBJECT_EVENT, + GOLD_ACTION_QUERY); + if(!gold_request) + return rc; + gold_request_add_condition(gold_request, "Machine", cluster_name, + GOLD_OPERATOR_NONE); + gold_request_add_condition(gold_request, "EndTime", "0", + GOLD_OPERATOR_NONE); + gold_request_add_condition(gold_request, "Name", "NULL", + GOLD_OPERATOR_NONE); + + gold_request_add_selection(gold_request, "CPUCount"); + + gold_response = get_gold_response(gold_request); + destroy_gold_request(gold_request); + + if(!gold_response) { + error("jobacct_p_cluster_procs: no response received"); + return rc; + } + + if(gold_response->entry_cnt > 0) { + gold_response_entry_t *resp_entry = + list_pop(gold_response->entries); + gold_name_value_t *name_val = list_pop(resp_entry->name_val); + + if(procs == atoi(name_val->value)) { + debug("System hasn't changed since last entry"); + destroy_gold_name_value(name_val); + destroy_gold_response_entry(resp_entry); + destroy_gold_response(gold_response); + return SLURM_SUCCESS; + } else { + debug("System has changed from %s cpus to %d", + name_val->value, procs); + } + + destroy_gold_name_value(name_val); + destroy_gold_response_entry(resp_entry); + } else { + debug("We don't have an entry for this machine " + "most likely a first time running."); + no_modify = 1; + } + + destroy_gold_response(gold_response); + + + if(no_modify) { + gold_request = create_gold_request(GOLD_OBJECT_EVENT, + GOLD_ACTION_MODIFY); + if(!gold_request) + return rc; + + gold_request_add_condition(gold_request, "Machine", + cluster_name, + GOLD_OPERATOR_NONE); + gold_request_add_condition(gold_request, "EndTime", "0", + GOLD_OPERATOR_NONE); + gold_request_add_condition(gold_request, "Name", "NULL", + GOLD_OPERATOR_NONE); + + snprintf(tmp_buff, sizeof(tmp_buff), "%d", + ((int)event_time - 1)); + gold_request_add_assignment(gold_request, "EndTime", tmp_buff); + + gold_response = get_gold_response(gold_request); + destroy_gold_request(gold_request); + + if(!gold_response) { + error("jobacct_p_cluster_procs: no response received"); + return rc; + } + + if(gold_response->rc) { + error("gold_response has non-zero rc(%d): %s", + gold_response->rc, + gold_response->message); + destroy_gold_response(gold_response); + return rc; + } + destroy_gold_response(gold_response); + } + + /* now add the new one */ + gold_request = create_gold_request(GOLD_OBJECT_EVENT, + GOLD_ACTION_CREATE); + if(!gold_request) + return rc; + + gold_request_add_assignment(gold_request, "Machine", cluster_name); + snprintf(tmp_buff, sizeof(tmp_buff), "%d", (int)event_time); + gold_request_add_assignment(gold_request, "StartTime", tmp_buff); + snprintf(tmp_buff, sizeof(tmp_buff), "%u", procs); + gold_request_add_assignment(gold_request, "CPUCount", tmp_buff); + + gold_response = get_gold_response(gold_request); + destroy_gold_request(gold_request); + + if(!gold_response) { + error("jobacct_p_cluster_procs: no response received"); + return rc; + } + + if(!gold_response->rc) + rc = SLURM_SUCCESS; + else { + error("gold_response has non-zero rc(%d): %s", + gold_response->rc, + gold_response->message); + } + destroy_gold_response(gold_response); + + return rc; +} diff --git a/src/plugins/jobacct/linux/jobacct_linux.c b/src/plugins/jobacct/linux/jobacct_linux.c index 2c4476300c7e3e934914609ac61754bc8b643239..626ae5d6c7bfec3665308df693487e41a4f350f4 100644 --- a/src/plugins/jobacct/linux/jobacct_linux.c +++ b/src/plugins/jobacct/linux/jobacct_linux.c @@ -307,6 +307,20 @@ void jobacct_p_resume_poll() common_resume_poll(); } +extern int jobacct_p_node_down(struct node_record *node_ptr, + time_t event_time, char *reason) +{ + return SLURM_SUCCESS; +} +extern int jobacct_p_node_up(struct node_record *node_ptr, time_t event_time) +{ + return SLURM_SUCCESS; +} +extern int jobacct_p_cluster_procs(uint32_t procs, time_t event_time) +{ + return SLURM_SUCCESS; +} + /* * _get_offspring_data() -- collect memory usage data for the offspring * diff --git a/src/plugins/jobacct/none/jobacct_none.c b/src/plugins/jobacct/none/jobacct_none.c index 49178f6016ff25d234ad7075e182a0aaa99958c3..25d995622ccfde8c97cf72def9942009668fe533 100644 --- a/src/plugins/jobacct/none/jobacct_none.c +++ b/src/plugins/jobacct/none/jobacct_none.c @@ -240,3 +240,18 @@ void jobacct_p_resume_poll() { return; } + +extern int jobacct_p_node_down(struct node_record *node_ptr, + time_t event_time, char *reason) +{ + return SLURM_SUCCESS; +} +extern int jobacct_p_node_up(struct node_record *node_ptr, time_t event_time) +{ + return SLURM_SUCCESS; +} +extern int jobacct_p_cluster_procs(uint32_t procs, time_t event_time) +{ + return SLURM_SUCCESS; +} + diff --git a/src/plugins/sched/wiki/get_jobs.c b/src/plugins/sched/wiki/get_jobs.c index e021e05a847044ba2b4dc4e0705421af9ed85a68..70c9d91aa8a9107333ca3259bc82238bbe939edf 100644 --- a/src/plugins/sched/wiki/get_jobs.c +++ b/src/plugins/sched/wiki/get_jobs.c @@ -50,6 +50,7 @@ static char * _dump_job(struct job_record *job_ptr, int state_info); static char * _get_group_name(gid_t gid); static uint16_t _get_job_cpus_per_task(struct job_record *job_ptr); static uint32_t _get_job_end_time(struct job_record *job_ptr); +static char * _get_job_features(struct job_record *job_ptr); static uint32_t _get_job_min_disk(struct job_record *job_ptr); static uint32_t _get_job_min_mem(struct job_record *job_ptr); static uint32_t _get_job_min_nodes(struct job_record *job_ptr); @@ -58,6 +59,7 @@ static uint32_t _get_job_submit_time(struct job_record *job_ptr); static uint32_t _get_job_suspend_time(struct job_record *job_ptr); static uint32_t _get_job_tasks(struct job_record *job_ptr); static uint32_t _get_job_time_limit(struct job_record *job_ptr); +static int _hidden_job(struct job_record *job_ptr); static char * _task_list(struct job_record *job_ptr); @@ -165,6 +167,19 @@ extern int get_jobs(char *cmd_ptr, int *err_code, char **err_msg) return 0; } +static int _hidden_job(struct job_record *job_ptr) +{ + int i; + + for (i=0; i<HIDE_PART_CNT; i++) { + if (hide_part_ptr[i] == NULL) + break; + if (hide_part_ptr[i] == job_ptr->part_ptr) + return 1; + } + return 0; +} + static char * _dump_all_jobs(int *job_cnt, int state_info) { int cnt = 0; @@ -174,6 +189,8 @@ static char * _dump_all_jobs(int *job_cnt, int state_info) job_iterator = list_iterator_create(job_list); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { + if (_hidden_job(job_ptr)) + continue; tmp_buf = _dump_job(job_ptr, state_info); if (cnt > 0) xstrcat(buf, "#"); @@ -220,6 +237,16 @@ static char * _dump_job(struct job_record *job_ptr, int state_info) xfree(hosts); } + if (job_ptr->job_state == JOB_PENDING) { + char *req_features = _get_job_features(job_ptr); + if (req_features) { + snprintf(tmp, sizeof(tmp), + "RFEATURES=%s;", req_features); + xstrcat(buf, tmp); + xfree(req_features); + } + } + if (job_ptr->job_state == JOB_FAILED) { snprintf(tmp, sizeof(tmp), "REJMESSAGE=\"%s\";", @@ -408,6 +435,35 @@ static char * _get_job_state(struct job_record *job_ptr) return "Removed"; } +/* Return a job's required features, if any joined with AND. + * If required features are joined by OR, then return NULL. + * Returned string must be xfreed. */ +static char * _get_job_features(struct job_record *job_ptr) +{ + int i; + char *rfeatures; + + if ((job_ptr->details == NULL) + || (job_ptr->details->features == NULL) + || (job_ptr->details->features[0] == '\0')) + return NULL; + + rfeatures = xstrdup(job_ptr->details->features); + /* Translate "&" to ":" */ + for (i=0; ; i++) { + if (rfeatures[i] == '\0') + return rfeatures; + if (rfeatures[i] == '|') + break; + if (rfeatures[i] == '&') + rfeatures[i] = ':'; + } + + /* Found '|' (OR), which is not supported by Moab */ + xfree(rfeatures); + return NULL; +} + static uint32_t _get_job_end_time(struct job_record *job_ptr) { if (IS_JOB_FINISHED(job_ptr)) diff --git a/src/plugins/sched/wiki/msg.c b/src/plugins/sched/wiki/msg.c index 20cedd7e3ee3f0ef00e95f35d3f5056d79be02e4..60df0858f47a5a82c5f544616b6904e1d90c5575 100644 --- a/src/plugins/sched/wiki/msg.c +++ b/src/plugins/sched/wiki/msg.c @@ -55,6 +55,7 @@ char e_host[E_HOST_SIZE] = ""; char e_host_bu[E_HOST_SIZE] = ""; uint16_t e_port = 0; struct part_record *exclude_part_ptr[EXC_PART_CNT]; +struct part_record *hide_part_ptr[HIDE_PART_CNT]; uint16_t job_aggregation_time = 10; /* Default value is 10 seconds */ int init_prio_mode = PRIO_HOLD; uint16_t kill_wait; @@ -230,11 +231,12 @@ extern int parse_wiki_config(void) {"EHostBackup", S_P_STRING}, {"EPort", S_P_UINT16}, {"ExcludePartitions", S_P_STRING}, + {"HidePartitionJobs", S_P_STRING}, {"JobAggregationTime", S_P_UINT16}, {"JobPriority", S_P_STRING}, {NULL} }; s_p_hashtbl_t *tbl; - char *exclude_partitions; + char *exclude_partitions, *hide_partitions; char *key = NULL, *priority_mode = NULL, *wiki_conf; struct stat buf; slurm_ctl_conf_t *conf; @@ -243,6 +245,8 @@ extern int parse_wiki_config(void) /* Set default values */ for (i=0; i<EXC_PART_CNT; i++) exclude_part_ptr[i] = NULL; + for (i=0; i<HIDE_PART_CNT; i++) + hide_part_ptr[i] = NULL; conf = slurm_conf_lock(); strncpy(e_host, conf->control_addr, sizeof(e_host)); if (conf->backup_addr) { @@ -301,6 +305,25 @@ extern int parse_wiki_config(void) } } + if (s_p_get_string(&hide_partitions, "HidePartitionJobs", tbl)) { + char *tok = NULL, *tok_p = NULL; + tok = strtok_r(hide_partitions, ",", &tok_p); + i = 0; + while (tok) { + if (i >= HIDE_PART_CNT) { + error("HidePartitionJobs has too many entries " + "skipping %s and later entries"); + break; + } + hide_part_ptr[i] = find_part_record(tok); + if (hide_part_ptr[i]) + i++; + else + error("HidePartitionJobs %s not found", tok); + tok = strtok_r(NULL, ",", &tok_p); + } + } + if (s_p_get_string(&priority_mode, "JobPriority", tbl)) { if (strcasecmp(priority_mode, "hold") == 0) init_prio_mode = PRIO_HOLD; diff --git a/src/plugins/sched/wiki/msg.h b/src/plugins/sched/wiki/msg.h index e19e45b1f08e449f47100c7101465e89fc5f65a4..5015e26cb162d5db939105399b4715007d9b8b44 100644 --- a/src/plugins/sched/wiki/msg.h +++ b/src/plugins/sched/wiki/msg.h @@ -84,6 +84,7 @@ /* Global configuration parameters */ #define E_HOST_SIZE 256 #define EXC_PART_CNT 10 +#define HIDE_PART_CNT 10 #define KEY_SIZE 32 #define PRIO_HOLD 0 #define PRIO_DECREMENT 1 @@ -93,6 +94,7 @@ extern char e_host[E_HOST_SIZE]; extern char e_host_bu[E_HOST_SIZE]; extern uint16_t e_port; extern struct part_record *exclude_part_ptr[EXC_PART_CNT]; +extern struct part_record *hide_part_ptr[HIDE_PART_CNT]; extern uint16_t job_aggregation_time; extern uint16_t kill_wait; extern uint16_t use_host_exp; diff --git a/src/plugins/sched/wiki2/cancel_job.c b/src/plugins/sched/wiki2/cancel_job.c index 26584cccd1d6f0fac8fc6c2ced128bef72c4e611..294a67c128f18523d0c961bdf80d4308c4775194 100644 --- a/src/plugins/sched/wiki2/cancel_job.c +++ b/src/plugins/sched/wiki2/cancel_job.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * cancel_job.c - Process Wiki cancel job request ***************************************************************************** - * Copyright (C) 2006 The Regents of the University of California. + * Copyright (C) 2006-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> * UCRL-CODE-226842. @@ -39,17 +39,21 @@ #include "src/slurmctld/locks.h" #include "src/slurmctld/slurmctld.h" -#define TYPE_ADMIN 0 -#define TYPE_TIMEOUT 1 +#define TYPE_ADMIN 0 +#define TYPE_TIMEOUT 1 -static int _cancel_job(uint32_t jobid, int *err_code, char **err_msg); -static int _timeout_job(uint32_t jobid, int *err_code, char **err_msg); +static int _cancel_job(uint32_t jobid, char *comment_ptr, + int *err_code, char **err_msg); +static int _timeout_job(uint32_t jobid, char *comment_ptr, + int *err_code, char **err_msg); -/* RET 0 on success, -1 on failure */ +/* Cancel a job: + * CMD=CANCELJOB ARG=<jobid> TYPE=<reason> [COMMENT=<whatever>] + * RET 0 on success, -1 on failure */ extern int cancel_job(char *cmd_ptr, int *err_code, char **err_msg) { - char *arg_ptr, *tmp_char; - int cancel_type = TYPE_ADMIN; + char *arg_ptr, *comment_ptr, *type_ptr, *tmp_char; + int cancel_type = TYPE_ADMIN, i; uint32_t jobid; static char reply_msg[128]; @@ -68,13 +72,54 @@ extern int cancel_job(char *cmd_ptr, int *err_code, char **err_msg) return -1; } - if (strstr(cmd_ptr, "TYPE=TIMEOUT") != 0) + comment_ptr = strstr(cmd_ptr, "COMMENT="); + type_ptr = strstr(cmd_ptr, "TYPE="); + + if (comment_ptr) { + comment_ptr[7] = ':'; + comment_ptr += 8; + if (comment_ptr[0] == '\"') { + comment_ptr++; + for (i=0; i<MAX_COMMENT_LEN; i++) { + if (comment_ptr[i] == '\0') + break; + if (comment_ptr[i] == '\"') { + comment_ptr[i] = '\0'; + break; + } + } + if (i == MAX_COMMENT_LEN) + comment_ptr[i-1] = '\0'; + } else if (comment_ptr[0] == '\'') { + comment_ptr++; + for (i=0; i<MAX_COMMENT_LEN; i++) { + if (comment_ptr[i] == '\0') + break; + if (comment_ptr[i] == '\'') { + comment_ptr[i] = '\0'; + break; + } + } + if (i == MAX_COMMENT_LEN) + comment_ptr[i-1] = '\0'; + } else + null_term(comment_ptr); + } + + if (type_ptr == NULL) { + *err_code = -300; + *err_msg = "No TYPE value"; + error("wiki: CANCELJOB has no TYPE specification"); + return -1; + } + type_ptr += 5; + if (strncmp(type_ptr, "TIMEOUT", 7) == 0) cancel_type = TYPE_TIMEOUT; - else if (strstr(cmd_ptr, "TYPE=WALLCLOCK") != 0) + else if (strncmp(type_ptr, "WALLCLOCK", 9) == 0) cancel_type = TYPE_TIMEOUT; - else if (strstr(cmd_ptr, "TYPE=ADMIN") != 0) + else if (strncmp(type_ptr, "ADMIN", 5) == 0) cancel_type = TYPE_ADMIN; - else if (strstr(cmd_ptr, "TYPE=") != 0) { + else { *err_code = -300; *err_msg = "Invalid TYPE value"; error("wiki: CANCELJOB has invalid TYPE"); @@ -82,10 +127,10 @@ extern int cancel_job(char *cmd_ptr, int *err_code, char **err_msg) } if (cancel_type == TYPE_ADMIN) { - if (_cancel_job(jobid, err_code, err_msg) != 0) + if (_cancel_job(jobid, comment_ptr, err_code, err_msg) != 0) return -1; } else { - if (_timeout_job(jobid, err_code, err_msg) != 0) + if (_timeout_job(jobid, comment_ptr, err_code, err_msg) != 0) return -1; } @@ -96,14 +141,35 @@ extern int cancel_job(char *cmd_ptr, int *err_code, char **err_msg) } /* Cancel a job now */ -static int _cancel_job(uint32_t jobid, int *err_code, char **err_msg) +static int _cancel_job(uint32_t jobid, char *comment_ptr, + int *err_code, char **err_msg) { int rc = 0, slurm_rc; /* Write lock on job info */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK }; + struct job_record *job_ptr = find_job_record(jobid); lock_slurmctld(job_write_lock); + if (job_ptr == NULL) { + *err_code = -700; + *err_msg = "No such job"; + error("wiki: Failed to find job %u", jobid); + rc = -1; + goto fini; + } + + if (comment_ptr) { + char *reserved = strstr(comment_ptr, "RESERVED:"); + if (reserved && job_ptr->details) { + reserved += 9; + job_ptr->details->reserved_resources = + strtol(reserved, NULL, 10); + } + xfree(job_ptr->comment); + job_ptr->comment = xstrdup(comment_ptr); + } + slurm_rc = job_signal(jobid, SIGKILL, 0, 0); if (slurm_rc != SLURM_SUCCESS) { *err_code = -700; @@ -121,7 +187,8 @@ static int _cancel_job(uint32_t jobid, int *err_code, char **err_msg) } /* Set timeout for specific job, the job will be purged soon */ -static int _timeout_job(uint32_t jobid, int *err_code, char **err_msg) +static int _timeout_job(uint32_t jobid, char *comment_ptr, + int *err_code, char **err_msg) { int rc = 0; struct job_record *job_ptr; @@ -139,6 +206,17 @@ static int _timeout_job(uint32_t jobid, int *err_code, char **err_msg) goto fini; } + if (comment_ptr) { + char *reserved = strstr(comment_ptr, "RESERVED:"); + if (reserved && job_ptr->details) { + reserved += 9; + job_ptr->details->reserved_resources = + strtol(reserved, NULL, 10); + } + xfree(job_ptr->comment); + job_ptr->comment = xstrdup(comment_ptr); + } + job_ptr->end_time = time(NULL); debug("wiki: set end time for job %u", jobid); diff --git a/src/plugins/sched/wiki2/get_jobs.c b/src/plugins/sched/wiki2/get_jobs.c index ea2afa63f7a5f7f8f8227a59c49472fd14304355..d05a99036f6feee35bca33135352422e3bdbf9be 100644 --- a/src/plugins/sched/wiki2/get_jobs.c +++ b/src/plugins/sched/wiki2/get_jobs.c @@ -61,6 +61,7 @@ static uint32_t _get_job_submit_time(struct job_record *job_ptr); static uint32_t _get_job_suspend_time(struct job_record *job_ptr); static uint32_t _get_job_tasks(struct job_record *job_ptr); static uint32_t _get_job_time_limit(struct job_record *job_ptr); +static int _hidden_job(struct job_record *job_ptr); #define SLURM_INFO_ALL 0 #define SLURM_INFO_VOLITILE 1 @@ -182,6 +183,24 @@ extern int get_jobs(char *cmd_ptr, int *err_code, char **err_msg) return 0; } +static int _hidden_job(struct job_record *job_ptr) +{ + int i; + + if (job_ptr->job_id < slurmctld_conf.first_job_id) { + /* jobs submitted directly by Moab */ + return 0; + } + + for (i=0; i<HIDE_PART_CNT; i++) { + if (hide_part_ptr[i] == NULL) + break; + if (hide_part_ptr[i] == job_ptr->part_ptr) + return 1; + } + return 0; +} + static char * _dump_all_jobs(int *job_cnt, int state_info) { int cnt = 0; @@ -191,6 +210,8 @@ static char * _dump_all_jobs(int *job_cnt, int state_info) job_iterator = list_iterator_create(job_list); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { + if (_hidden_job(job_ptr)) + continue; tmp_buf = _dump_job(job_ptr, state_info); if (cnt > 0) xstrcat(buf, "#"); diff --git a/src/plugins/sched/wiki2/job_modify.c b/src/plugins/sched/wiki2/job_modify.c index 806106c3aa38041dda32bb628ba7051b92a3b451..f726304fab48edce864bd2ff09e5ac713f665bb4 100644 --- a/src/plugins/sched/wiki2/job_modify.c +++ b/src/plugins/sched/wiki2/job_modify.c @@ -40,7 +40,8 @@ #include "src/slurmctld/locks.h" #include "src/slurmctld/slurmctld.h" -static void _null_term(char *str) +/* Given a string, replace the first space found with '\0' */ +extern void null_term(char *str) { char *tmp_ptr; for (tmp_ptr=str; ; tmp_ptr++) { @@ -72,7 +73,8 @@ static int32_t _get_depend_id(char *str) static int _job_modify(uint32_t jobid, char *bank_ptr, int32_t depend_id, char *new_hostlist, uint32_t new_node_cnt, char *part_name_ptr, - uint32_t new_time_limit, char *name_ptr) + uint32_t new_time_limit, char *name_ptr, + char *start_ptr) { struct job_record *job_ptr; time_t now = time(NULL); @@ -104,6 +106,7 @@ static int _job_modify(uint32_t jobid, char *bank_ptr, old_time) * 60); last_job_update = now; } + if (bank_ptr) { info("wiki: change job %u bank %s", jobid, bank_ptr); xfree(job_ptr->account); @@ -111,6 +114,21 @@ static int _job_modify(uint32_t jobid, char *bank_ptr, last_job_update = now; } + if (start_ptr) { + char *end_ptr; + uint32_t begin_time = strtol(start_ptr, &end_ptr, 10); + if ((job_ptr->job_state == JOB_PENDING) && + (job_ptr->details)) { + info("wiki: change job %u begin time to %u", + jobid, begin_time); + job_ptr->details->begin_time = begin_time; + } else { + error("wiki: MODIFYJOB begin_time of non-pending " + "job %u", jobid); + return ESLURM_DISABLED; + } + } + if (name_ptr) { info("wiki: change job %u name %s", jobid, name_ptr); strncpy(job_ptr->name, name_ptr, sizeof(job_ptr->name)); @@ -163,11 +181,13 @@ static int _job_modify(uint32_t jobid, char *bank_ptr, } host_fini: if (rc) { - info("wiki: change job %u invalid hostlist %s", jobid, new_hostlist); + info("wiki: change job %u invalid hostlist %s", + jobid, new_hostlist); xfree(job_ptr->details->req_nodes); return EINVAL; } else { - info("wiki: change job %u hostlist %s", jobid, new_hostlist); + info("wiki: change job %u hostlist %s", + jobid, new_hostlist); } } @@ -208,10 +228,11 @@ host_fini: if (rc) { /* Modify a job: * CMD=MODIFYJOB ARG=<jobid> PARTITION=<name> NODES=<number> * DEPEND=afterany:<jobid> TIMELIMT=<seconds> BANK=<name> + * MINSTARTTIME=<uts> * RET 0 on success, -1 on failure */ extern int job_modify_wiki(char *cmd_ptr, int *err_code, char **err_msg) { - char *arg_ptr, *bank_ptr, *depend_ptr, *nodes_ptr; + char *arg_ptr, *bank_ptr, *depend_ptr, *nodes_ptr, *start_ptr; char *host_ptr, *name_ptr, *part_ptr, *time_ptr, *tmp_char; int i, slurm_rc; int depend_id = -1; @@ -243,13 +264,14 @@ extern int job_modify_wiki(char *cmd_ptr, int *err_code, char **err_msg) depend_ptr = strstr(cmd_ptr, "DEPEND="); name_ptr = strstr(cmd_ptr, "JOBNAME="); host_ptr = strstr(cmd_ptr, "HOSTLIST="); + start_ptr = strstr(cmd_ptr, "MINSTARTTIME="); nodes_ptr = strstr(cmd_ptr, "NODES="); part_ptr = strstr(cmd_ptr, "PARTITION="); time_ptr = strstr(cmd_ptr, "TIMELIMIT="); if (bank_ptr) { bank_ptr[4] = ':'; bank_ptr += 5; - _null_term(bank_ptr); + null_term(bank_ptr); } if (depend_ptr) { depend_ptr[6] = ':'; @@ -266,7 +288,7 @@ extern int job_modify_wiki(char *cmd_ptr, int *err_code, char **err_msg) if (host_ptr) { host_ptr[8] = ':'; host_ptr += 9; - _null_term(host_ptr); + null_term(host_ptr); } if (name_ptr) { name_ptr[7] = ':'; @@ -296,8 +318,13 @@ extern int job_modify_wiki(char *cmd_ptr, int *err_code, char **err_msg) if (i == MAX_JOBNAME_LEN) name_ptr[i-1] = '\0'; } else - _null_term(name_ptr); + null_term(name_ptr); } + if (start_ptr) { + start_ptr[12] = ':'; + start_ptr += 13; + null_term(start_ptr); + } if (nodes_ptr) { nodes_ptr[5] = ':'; nodes_ptr += 6; @@ -306,7 +333,7 @@ extern int job_modify_wiki(char *cmd_ptr, int *err_code, char **err_msg) if (part_ptr) { part_ptr[9] = ':'; part_ptr += 10; - _null_term(part_ptr); + null_term(part_ptr); } if (time_ptr) { time_ptr[9] = ':'; @@ -325,7 +352,8 @@ extern int job_modify_wiki(char *cmd_ptr, int *err_code, char **err_msg) lock_slurmctld(job_write_lock); slurm_rc = _job_modify(jobid, bank_ptr, depend_id, host_ptr, - new_node_cnt, part_ptr, new_time_limit, name_ptr); + new_node_cnt, part_ptr, new_time_limit, name_ptr, + start_ptr); unlock_slurmctld(job_write_lock); if (slurm_rc != SLURM_SUCCESS) { *err_code = -700; diff --git a/src/plugins/sched/wiki2/msg.c b/src/plugins/sched/wiki2/msg.c index 8161e15235575199cadd88c1abce6a1a30f81bb0..ab710c9768ab9eadcaeebce707502578783a49dd 100644 --- a/src/plugins/sched/wiki2/msg.c +++ b/src/plugins/sched/wiki2/msg.c @@ -56,6 +56,7 @@ char e_host[E_HOST_SIZE] = ""; char e_host_bu[E_HOST_SIZE] = ""; uint16_t e_port = 0; struct part_record *exclude_part_ptr[EXC_PART_CNT]; +struct part_record *hide_part_ptr[HIDE_PART_CNT]; uint32_t first_job_id; uint16_t job_aggregation_time = 10; /* Default value is 10 seconds */ int init_prio_mode = PRIO_HOLD; @@ -232,12 +233,13 @@ extern int parse_wiki_config(void) {"EHostBackup", S_P_STRING}, {"EPort", S_P_UINT16}, {"ExcludePartitions", S_P_STRING}, + {"HidePartitionJobs", S_P_STRING}, {"HostFormat", S_P_UINT16}, {"JobAggregationTime", S_P_UINT16}, {"JobPriority", S_P_STRING}, {NULL} }; s_p_hashtbl_t *tbl; - char *exclude_partitions; + char *exclude_partitions, *hide_partitions; char *key = NULL, *priority_mode = NULL, *wiki_conf; struct stat buf; slurm_ctl_conf_t *conf; @@ -246,6 +248,8 @@ extern int parse_wiki_config(void) /* Set default values */ for (i=0; i<EXC_PART_CNT; i++) exclude_part_ptr[i] = NULL; + for (i=0; i<HIDE_PART_CNT; i++) + hide_part_ptr[i] = NULL; conf = slurm_conf_lock(); strncpy(e_host, conf->control_addr, sizeof(e_host)); if (conf->backup_addr) { @@ -305,6 +309,25 @@ extern int parse_wiki_config(void) } } + if (s_p_get_string(&hide_partitions, "HidePartitionJobs", tbl)) { + char *tok = NULL, *tok_p = NULL; + tok = strtok_r(hide_partitions, ",", &tok_p); + i = 0; + while (tok) { + if (i >= HIDE_PART_CNT) { + error("HidePartitionJobs has too many entries " + "skipping %s and later entries"); + break; + } + hide_part_ptr[i] = find_part_record(tok); + if (hide_part_ptr[i]) + i++; + else + error("HidePartitionJobs %s not found", tok); + tok = strtok_r(NULL, ",", &tok_p); + } + } + if (s_p_get_string(&priority_mode, "JobPriority", tbl)) { if (strcasecmp(priority_mode, "hold") == 0) init_prio_mode = PRIO_HOLD; diff --git a/src/plugins/sched/wiki2/msg.h b/src/plugins/sched/wiki2/msg.h index bba1686fdcb42d20f3f3b6326c7197a07aa7a9e1..53cbb8c9ff18bebae1f1add54b6dfff693f2bd26 100644 --- a/src/plugins/sched/wiki2/msg.h +++ b/src/plugins/sched/wiki2/msg.h @@ -82,17 +82,20 @@ #include "src/slurmctld/slurmctld.h" /* Global configuration parameters */ -#define E_HOST_SIZE 256 -#define EXC_PART_CNT 10 -#define KEY_SIZE 32 -#define PRIO_HOLD 0 -#define PRIO_DECREMENT 1 +#define E_HOST_SIZE 256 +#define EXC_PART_CNT 10 +#define HIDE_PART_CNT 10 +#define KEY_SIZE 32 +#define PRIO_HOLD 0 +#define PRIO_DECREMENT 1 +#define MAX_COMMENT_LEN 512 extern int init_prio_mode; extern char auth_key[KEY_SIZE]; extern char e_host[E_HOST_SIZE]; extern char e_host_bu[E_HOST_SIZE]; extern uint16_t e_port; extern struct part_record *exclude_part_ptr[EXC_PART_CNT]; +extern struct part_record *hide_part_ptr[HIDE_PART_CNT]; extern uint32_t first_job_id; extern uint16_t job_aggregation_time; extern uint16_t kill_wait; @@ -103,6 +106,11 @@ extern int spawn_msg_thread(void); extern void term_msg_thread(void); extern char * bitmap2wiki_node_name(bitstr_t *bitmap); +/* + * Given a string, replace the first space found with '\0' + */ +extern void null_term(char *str); + /* Functions called from within msg.c (rather than creating a bunch * more header files with one function definition each */ extern int cancel_job(char *cmd_ptr, int *err_code, char **err_msg); diff --git a/src/plugins/sched/wiki2/start_job.c b/src/plugins/sched/wiki2/start_job.c index 76cf4b31fe58ff46597740f86ebc071090b2b13f..d6fd7dd8201d8e1f633723ccceb90d5c1e63f800 100644 --- a/src/plugins/sched/wiki2/start_job.c +++ b/src/plugins/sched/wiki2/start_job.c @@ -44,12 +44,15 @@ #include "src/slurmctld/state_save.h" static int _start_job(uint32_t jobid, int task_cnt, char *hostlist, - char *tasklist, int *err_code, char **err_msg); + char *tasklist, char *comment_ptr, + int *err_code, char **err_msg); -/* RET 0 on success, -1 on failure */ +/* Start a job: + * CMD=STARTJOB ARG=<jobid> TASKLIST=<node_list> [COMMENT=<whatever>] + * RET 0 on success, -1 on failure */ extern int start_job(char *cmd_ptr, int *err_code, char **err_msg) { - char *arg_ptr, *task_ptr, *tasklist, *tmp_char; + char *arg_ptr, *comment_ptr, *task_ptr, *tasklist, *tmp_char; int i, rc, task_cnt; uint32_t jobid; hostlist_t hl = (hostlist_t) NULL; @@ -71,7 +74,40 @@ extern int start_job(char *cmd_ptr, int *err_code, char **err_msg) return -1; } - task_ptr = strstr(cmd_ptr, "TASKLIST="); + comment_ptr = strstr(cmd_ptr, "COMMENT="); + task_ptr = strstr(cmd_ptr, "TASKLIST="); + + if (comment_ptr) { + comment_ptr[7] = ':'; + comment_ptr += 8; + if (comment_ptr[0] == '\"') { + comment_ptr++; + for (i=0; i<MAX_COMMENT_LEN; i++) { + if (comment_ptr[i] == '\0') + break; + if (comment_ptr[i] == '\"') { + comment_ptr[i] = '\0'; + break; + } + } + if (i == MAX_COMMENT_LEN) + comment_ptr[i-1] = '\0'; + } else if (comment_ptr[0] == '\'') { + comment_ptr++; + for (i=0; i<MAX_COMMENT_LEN; i++) { + if (comment_ptr[i] == '\0') + break; + if (comment_ptr[i] == '\'') { + comment_ptr[i] = '\0'; + break; + } + } + if (i == MAX_COMMENT_LEN) + comment_ptr[i-1] = '\0'; + } else + null_term(comment_ptr); + } + if (task_ptr == NULL) { *err_code = -300; *err_msg = "STARTJOB lacks TASKLIST"; @@ -103,7 +139,7 @@ extern int start_job(char *cmd_ptr, int *err_code, char **err_msg) return -1; } - rc = _start_job(jobid, task_cnt, host_string, tasklist, + rc = _start_job(jobid, task_cnt, host_string, tasklist, comment_ptr, err_code, err_msg); xfree(tasklist); if (rc == 0) { @@ -121,11 +157,13 @@ extern int start_job(char *cmd_ptr, int *err_code, char **err_msg) * hostlist (IN) - SLURM hostlist expression with no repeated hostnames * tasklist (IN/OUT) - comma separated list of hosts with tasks to be started, * list hostname once per task to start + * comment_ptr (IN) - new comment field for the job or NULL for no change * err_code (OUT) - Moab error code * err_msg (OUT) - Moab error message */ static int _start_job(uint32_t jobid, int task_cnt, char *hostlist, - char *tasklist, int *err_code, char **err_msg) + char *tasklist, char *comment_ptr, + int *err_code, char **err_msg) { int rc = 0, old_task_cnt = 1; struct job_record *job_ptr; @@ -167,6 +205,17 @@ static int _start_job(uint32_t jobid, int task_cnt, char *hostlist, goto fini; } + if (comment_ptr) { + char *reserved = strstr(comment_ptr, "RESERVED:"); + if (reserved) { + reserved += 9; + job_ptr->details->reserved_resources = + strtol(reserved, NULL, 10); + } + xfree(job_ptr->comment); + job_ptr->comment = xstrdup(comment_ptr); + } + new_node_list = xstrdup(hostlist); if (hostlist && (new_node_list == NULL)) { *err_code = -700; diff --git a/src/sbcast/agent.c b/src/sbcast/agent.c index 2b102c89802864d5d30abdc2b5ce060237b8570a..a5ed1c0bc43ffc9d7d75932e1a70fdbedb0a8ed2 100644 --- a/src/sbcast/agent.c +++ b/src/sbcast/agent.c @@ -146,7 +146,7 @@ extern void send_rpc(file_bcast_msg_t *bcast_msg, hl = hostlist_create(alloc_resp->node_list); i = 0; - while(i < alloc_resp->node_cnt) { + while (i < alloc_resp->node_cnt) { int j = 0; name = hostlist_shift(hl); if(!name) { @@ -170,8 +170,6 @@ extern void send_rpc(file_bcast_msg_t *bcast_msg, slurm_msg_t_init(&thread_info[threads_used].msg); thread_info[threads_used].msg.msg_type = REQUEST_FILE_BCAST; - thread_info[threads_used].msg.data = bcast_msg; - threads_used++; } xfree(span); @@ -187,6 +185,7 @@ extern void send_rpc(file_bcast_msg_t *bcast_msg, error("pthread_attr_setdetachstate error %m"); for (i=0; i<threads_used; i++) { + thread_info[i].msg.data = bcast_msg; slurm_mutex_lock(&agent_cnt_mutex); agent_cnt++; slurm_mutex_unlock(&agent_cnt_mutex); diff --git a/src/scontrol/update_job.c b/src/scontrol/update_job.c index a9f491800d7bf66433cb5e66c97e0c36de4c1e13..6e337ab76adf5c47843f07b31271f308015afebf 100644 --- a/src/scontrol/update_job.c +++ b/src/scontrol/update_job.c @@ -277,6 +277,12 @@ scontrol_update_job (int argc, char *argv[]) (char **) NULL, 10); update_cnt++; } + else if (strncasecmp(argv[i], "TasksPerNode=", 13) == 0) { + job_msg.ntasks_per_node = + (uint16_t) strtol(&argv[i][13], + (char **) NULL, 10); + update_cnt++; + } else if (strncasecmp(argv[i], "ReqThreads=", 11) == 0) { job_msg.min_threads = (uint16_t) strtol(&argv[i][11], diff --git a/src/slurmctld/Makefile.am b/src/slurmctld/Makefile.am index 5852f1e58ea7af94af56011527746e519f28f766..ce5b031cc7f2bc1065c5491ea85add6af3026b14 100644 --- a/src/slurmctld/Makefile.am +++ b/src/slurmctld/Makefile.am @@ -23,7 +23,8 @@ slurmctld_SOURCES = \ locks.c \ locks.h \ node_mgr.c \ - node_scheduler.c node_scheduler.h \ + node_scheduler.c \ + node_scheduler.h \ partition_mgr.c \ ping_nodes.c \ ping_nodes.h \ diff --git a/src/slurmctld/Makefile.in b/src/slurmctld/Makefile.in index 071a2d2eb64a1e80c013cd7dd34640b91b0fa7e8..c438bcabe12e7a427d91fede39bd76202a631b9a 100644 --- a/src/slurmctld/Makefile.in +++ b/src/slurmctld/Makefile.in @@ -272,7 +272,8 @@ slurmctld_SOURCES = \ locks.c \ locks.h \ node_mgr.c \ - node_scheduler.c node_scheduler.h \ + node_scheduler.c \ + node_scheduler.h \ partition_mgr.c \ ping_nodes.c \ ping_nodes.h \ diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 36a9552ba7397ea6d1275322bcc13d00b2f98b47..99769cf93c59635273668751667d40109ea30989 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * controller.c - main control machine daemon for slurm - * $Id: controller.c 12861 2007-12-19 22:04:25Z jette $ + * $Id: controller.c 13077 2008-01-23 22:31:44Z da $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -100,6 +100,12 @@ * check-in before we ping them */ #define SHUTDOWN_WAIT 2 /* Time to wait for backup server shutdown */ +#if (0) +/* If defined and FastSchedule=0 in slurm.conf, then report the CPU count that a + * node registers with rather than the CPU count defined for the node in slurm.conf */ +#define SLURM_NODE_ACCT_REGISTER 1 +#endif + /**************************************************************************\ * To test for memory leaks, set MEMORY_LEAK_DEBUG to 1 using * "configure --enable-memory-leak-debug" then execute @@ -152,6 +158,8 @@ static int controller_sigarray[] = { static void _default_sigaction(int sig); inline static void _free_server_thread(void); +static int _gold_cluster_ready(); +static int _gold_mark_all_nodes_down(char *reason, time_t event_time); static void _init_config(void); static void _init_pidfile(void); static void _kill_old_slurmctld(void); @@ -301,13 +309,46 @@ int main(int argc, char *argv[]) (void) _shutdown_backup_controller(SHUTDOWN_WAIT); /* Now recover the remaining state information */ if (switch_restore(slurmctld_conf.state_save_location, - recover ? true : false)) + recover ? true : false)) fatal(" failed to initialize switch plugin" ); if ((error_code = read_slurm_conf(recover))) { fatal("read_slurm_conf reading %s: %s", slurmctld_conf.slurm_conf, slurm_strerror(error_code)); } + + if (recover == 0) + _gold_mark_all_nodes_down("cold-start", + time(NULL)); + else if (!stat("/tmp/slurm_gold_first", &stat_buf)) { + /* this is here for when slurm is + * started with gold for the first + * time to log any downed nodes. + */ + struct node_record *node_ptr = + node_record_table_ptr; + int i=0; + time_t event_time = time(NULL); + debug("found /tmp/slurm_gold_first, " + "setting nodes down"); + for (i = 0; + i < node_record_count; + i++, node_ptr++) { + if (node_ptr->name == '\0' + || !node_ptr->reason) + continue; + + if(jobacct_g_node_down( + node_ptr, + event_time, + node_ptr->reason) + == SLURM_ERROR) + break; + } + if(unlink("/tmp/slurm_gold_first") < 0) + error("Error deleting " + "/tmp/slurm_gold_first"); + } } else { error("this host (%s) not valid controller (%s or %s)", node_name, slurmctld_conf.control_machine, @@ -315,6 +356,7 @@ int main(int argc, char *argv[]) exit(0); } info("Running as primary controller"); + _gold_cluster_ready(); if (slurm_sched_init() != SLURM_SUCCESS) fatal("failed to initialize scheduling plugin"); @@ -762,6 +804,61 @@ static void _free_server_thread(void) pthread_cond_broadcast(&server_thread_cond); } +static int _gold_cluster_ready() +{ + uint32_t procs = 0; + struct node_record *node_ptr; + int i; + int rc = SLURM_ERROR; + time_t event_time = time(NULL); + + node_ptr = node_record_table_ptr; + for (i = 0; i < node_record_count; i++, node_ptr++) { + if (node_ptr->name == '\0') + continue; +#ifdef SLURM_NODE_ACCT_REGISTER + if (slurmctld_conf.fast_schedule) + procs += node_ptr->config_ptr->cpus; + else + procs += node_ptr->cpus; +#else + procs += node_ptr->config_ptr->cpus; +#endif + } + + rc = jobacct_g_cluster_procs(procs, event_time); + + return rc; +} + +static int _gold_mark_all_nodes_down(char *reason, time_t event_time) +{ + char *state_file; + struct stat stat_buf; + struct node_record *node_ptr; + int i; + int rc = SLURM_ERROR; + + state_file = xstrdup (slurmctld_conf.state_save_location); + xstrcat (state_file, "/node_state"); + if (stat(state_file, &stat_buf)) { + error("_gold_mark_all_nodes_down: could not stat(%s) to record " + "node down time", state_file); + xfree(state_file); + return rc; + } + xfree(state_file); + + node_ptr = node_record_table_ptr; + for (i = 0; i < node_record_count; i++, node_ptr++) { + if (node_ptr->name == '\0') + continue; + if((rc = jobacct_g_node_down(node_ptr, event_time, reason)) + == SLURM_ERROR) + break; + } + return rc; +} /* * _slurmctld_background - process slurmctld background activities * purge defunct job records, save state, schedule jobs, and @@ -778,6 +875,7 @@ static void *_slurmctld_background(void *no_data) static time_t last_timelimit_time; static time_t last_assert_primary_time; static time_t last_trigger; + static time_t last_node_acct; time_t now; int ping_interval; DEF_TIMERS; @@ -795,6 +893,9 @@ static void *_slurmctld_background(void *no_data) * (Might kill jobs on nodes set DOWN) */ slurmctld_lock_t node_write_lock = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; + /* Locks: Read node */ + slurmctld_lock_t node_read_lock = { + NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; /* Locks: Write partition */ slurmctld_lock_t part_write_lock = { NO_LOCK, NO_LOCK, NO_LOCK, WRITE_LOCK }; @@ -813,6 +914,7 @@ static void *_slurmctld_background(void *no_data) ping_interval = 60 * 60 * 24 * 356; /* one year */ last_ping_node_time = now + (time_t)MIN_CHECKIN_TIME - ping_interval; last_ping_srun_time = now; + last_node_acct = now; (void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); (void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); debug3("_slurmctld_background pid = %u", getpid()); @@ -912,6 +1014,14 @@ static void *_slurmctld_background(void *no_data) save_all_state(); } + if (difftime(now, last_node_acct) >= PERIODIC_NODE_ACCT) { + /* Report current node state to account for added + * or reconfigured nodes */ + last_node_acct = now; + lock_slurmctld(node_read_lock); + _gold_cluster_ready(); + unlock_slurmctld(node_read_lock); + } /* Reassert this machine as the primary controller. * A network or security problem could result in * the backup controller assuming control even diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index ac64ee1e7f5e5932a5c9ade926890436af66f285..5109f4cf6df2bba5ee8859cf7a70bc956f5d30c7 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -3,7 +3,7 @@ * Note: there is a global job list (job_list), time stamp * (last_job_update), and hash table (job_hash) * - * $Id: job_mgr.c 12861 2007-12-19 22:04:25Z jette $ + * $Id: job_mgr.c 13083 2008-01-24 16:28:23Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -1073,13 +1073,13 @@ extern int kill_running_job_by_node_name(char *node_name, bool step_test) job_ptr->end_time = job_ptr->suspend_time; else job_ptr->end_time = now; - deallocate_nodes(job_ptr, false, suspended); - + /* We want this job to look like it was cancelled in the * accounting logs. Set a new submit time so the restarted * job looks like a new job. */ save_state = job_ptr->job_state; job_ptr->job_state = JOB_CANCELLED; + deallocate_nodes(job_ptr, false, suspended); job_completion_logger(job_ptr); job_ptr->job_state = save_state; job_ptr->details->submit_time = now; @@ -1096,8 +1096,8 @@ extern int kill_running_job_by_node_name(char *node_name, bool step_test) job_ptr->suspend_time; else job_ptr->end_time = time(NULL); - job_completion_logger(job_ptr); deallocate_nodes(job_ptr, false, suspended); + job_completion_logger(job_ptr); } } @@ -1418,11 +1418,7 @@ extern int job_allocate(job_desc_msg_t * job_specs, int immediate, last_job_update = now; slurm_sched_schedule(); /* work for external scheduler */ } - if (independent && - (job_ptr->details && (job_ptr->details->begin_time == 0)) && - ((error_code == SLURM_SUCCESS) || (error_code == ESLURM_NODES_BUSY))) - job_ptr->details->begin_time = now; - + if ((error_code == ESLURM_NODES_BUSY) || (error_code == ESLURM_JOB_HELD) || (error_code == ESLURM_REQUESTED_PART_CONFIG_UNAVAILABLE)) { @@ -1999,7 +1995,7 @@ static int _job_create(job_desc_msg_t * job_desc, int allocate, int will_run, job_ptr->priority = 1; /* Move to end of queue */ job_ptr->state_reason = fail_reason; } - jobacct_g_job_start_slurmctld(job_ptr); + cleanup: FREE_NULL_BITMAP(req_bitmap); @@ -3776,6 +3772,21 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) } } + if (job_specs->ntasks_per_node != (uint16_t) NO_VAL) { + if ((!IS_JOB_PENDING(job_ptr)) || (detail_ptr == NULL)) + error_code = ESLURM_DISABLED; + else if (super_user) { + detail_ptr->ntasks_per_node = job_specs->ntasks_per_node; + info("update_job: setting ntasks_per_node to %u for " + "job_id %u", job_specs->ntasks_per_node, + job_specs->job_id); + } else { + error("Not super user: setting ntasks_oper_node to job %u", + job_specs->job_id); + error_code = ESLURM_ACCESS_DENIED; + } + } + if (job_specs->dependency != NO_VAL) { if (!IS_JOB_PENDING(job_ptr)) error_code = ESLURM_DISABLED; @@ -4392,7 +4403,6 @@ extern void job_completion_logger(struct job_record *job_ptr) mail_job_info(job_ptr, MAIL_JOB_FAIL); } - jobacct_g_job_complete_slurmctld(job_ptr); g_slurm_jobcomp_write(job_ptr); srun_job_complete(job_ptr); } @@ -4407,25 +4417,31 @@ extern bool job_independent(struct job_record *job_ptr) { struct job_record *dep_ptr; struct job_details *detail_ptr = job_ptr->details; + time_t now = time(NULL); - if (detail_ptr && (detail_ptr->begin_time > time(NULL))) { + if (detail_ptr && (detail_ptr->begin_time > now)) { job_ptr->state_reason = WAIT_TIME; return false; /* not yet time */ } - + if (job_ptr->dependency == 0) - return true; + goto indi; dep_ptr = find_job_record(job_ptr->dependency); if (dep_ptr == NULL) - return true; + goto indi; if (((dep_ptr->job_state & JOB_COMPLETING) == 0) && (dep_ptr->job_state >= JOB_COMPLETE)) - return true; + goto indi; job_ptr->state_reason = WAIT_DEPENDENCY; return false; /* job exists and incomplete */ + + indi: /* job is independent, set begin time as needed */ + if (detail_ptr && (detail_ptr->begin_time == 0)) + detail_ptr->begin_time = now; + return true; } /* * determine if job is ready to execute per the node select plugin @@ -4807,14 +4823,14 @@ extern int job_requeue (uid_t uid, uint32_t job_id, slurm_fd conn_fd) job_ptr->end_time = job_ptr->suspend_time; else job_ptr->end_time = now; - deallocate_nodes(job_ptr, false, suspended); - xfree(job_ptr->details->req_node_layout); /* We want this job to look like it was cancelled in the * accounting logs. Set a new submit time so the restarted * job looks like a new job. */ save_state = job_ptr->job_state; job_ptr->job_state = JOB_CANCELLED; + deallocate_nodes(job_ptr, false, suspended); + xfree(job_ptr->details->req_node_layout); job_completion_logger(job_ptr); job_ptr->job_state = save_state; job_ptr->details->submit_time = now; diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 5d83687d7c84560e1e16c0e2d938fead0a96f0be..7478bc6062aeed2917abe713367a82316bbad159 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -4,7 +4,7 @@ * hash table (node_hash_table), time stamp (last_node_update) and * configuration list (config_list) * - * $Id: node_mgr.c 12407 2007-09-25 22:51:43Z jette $ + * $Id: node_mgr.c 13068 2008-01-23 18:41:54Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -62,6 +62,7 @@ #include "src/common/xstring.h" #include "src/common/node_select.h" #include "src/common/read_config.h" +#include "src/common/slurm_jobacct.h" #include "src/slurmctld/agent.h" #include "src/slurmctld/locks.h" #include "src/slurmctld/ping_nodes.h" @@ -96,7 +97,8 @@ static struct node_record * _find_alias_node_record (char *name); static int _hash_index (char *name); static void _list_delete_config (void *config_entry); static int _list_find_config (void *config_entry, void *key); -static void _make_node_down(struct node_record *node_ptr); +static void _make_node_down(struct node_record *node_ptr, + time_t event_time); static void _node_did_resp(struct node_record *node_ptr); static bool _node_is_hidden(struct node_record *node_ptr); static void _node_not_resp (struct node_record *node_ptr, time_t msg_time); @@ -984,11 +986,11 @@ void set_slurmd_addr (void) */ int update_node ( update_node_msg_t * update_node_msg ) { - int error_code = 0, base_state = 0, node_inx; + int error_code = 0, node_inx; struct node_record *node_ptr = NULL; char *this_node_name = NULL; hostlist_t host_list; - uint16_t node_flags = 0, state_val; + uint16_t base_state = 0, node_flags = 0, state_val; time_t now = time(NULL); if (update_node_msg -> node_names == NULL ) { @@ -1018,6 +1020,14 @@ int update_node ( update_node_msg_t * update_node_msg ) break; } + if ((update_node_msg -> reason) && + (update_node_msg -> reason[0])) { + xfree(node_ptr->reason); + node_ptr->reason = xstrdup(update_node_msg->reason); + info ("update_node: node %s reason set to: %s", + this_node_name, node_ptr->reason); + } + if (state_val != (uint16_t) NO_VAL) { base_state = node_ptr->node_state; if (!_valid_node_state_change(base_state, state_val)) { @@ -1032,8 +1042,12 @@ int update_node ( update_node_msg_t * update_node_msg ) } if (state_val != (uint16_t) NO_VAL) { if (state_val == NODE_RESUME) { - node_ptr->node_state &= (~NODE_STATE_DRAIN); base_state &= NODE_STATE_BASE; + if ((base_state == NODE_STATE_IDLE) && + (node_ptr->node_state & NODE_STATE_DRAIN)) { + jobacct_g_node_up(node_ptr, now); + } + node_ptr->node_state &= (~NODE_STATE_DRAIN); if (base_state == NODE_STATE_DOWN) state_val = NODE_STATE_IDLE; else @@ -1042,11 +1056,20 @@ int update_node ( update_node_msg_t * update_node_msg ) if (state_val == NODE_STATE_DOWN) { /* We must set node DOWN before killing * its jobs */ - _make_node_down(node_ptr); + _make_node_down(node_ptr, now); kill_running_job_by_node_name (this_node_name, false); } else if (state_val == NODE_STATE_IDLE) { + base_state &= NODE_STATE_BASE; + if (base_state == NODE_STATE_DOWN) { + trigger_node_up(node_ptr); + jobacct_g_node_up(node_ptr, now); + } else if ((base_state == NODE_STATE_IDLE) && + (node_ptr->node_state & NODE_STATE_DRAIN)) { + jobacct_g_node_up(node_ptr, now); + } + /* assume they want to clear DRAIN flag too */ node_ptr->node_state &= (~NODE_STATE_DRAIN); bit_set (avail_node_bitmap, node_inx); @@ -1065,6 +1088,10 @@ int update_node ( update_node_msg_t * update_node_msg ) bit_clear (avail_node_bitmap, node_inx); state_val = node_ptr->node_state | NODE_STATE_DRAIN; + if ((node_ptr->run_job_cnt == 0) && + (node_ptr->comp_job_cnt == 0)) + jobacct_g_node_down(node_ptr, now, + NULL); } else { info ("Invalid node state specified %d", @@ -1087,14 +1114,6 @@ int update_node ( update_node_msg_t * update_node_msg ) } } - if ((update_node_msg -> reason) && - (update_node_msg -> reason[0])) { - xfree(node_ptr->reason); - node_ptr->reason = xstrdup(update_node_msg->reason); - info ("update_node: node %s reason set to: %s", - this_node_name, node_ptr->reason); - } - base_state = node_ptr->node_state & NODE_STATE_BASE; if ((base_state != NODE_STATE_DOWN) && ((node_ptr->node_state & NODE_STATE_DRAIN) == 0)) @@ -1264,6 +1283,7 @@ extern int drain_nodes ( char *nodes, char *reason ) struct node_record *node_ptr; char *this_node_name ; hostlist_t host_list; + time_t now = time(NULL); if ((nodes == NULL) || (nodes[0] == '\0')) { error ("drain_nodes: invalid node name %s", nodes); @@ -1300,6 +1320,11 @@ extern int drain_nodes ( char *nodes, char *reason ) xfree(node_ptr->reason); node_ptr->reason = xstrdup(reason); + if ((node_ptr->run_job_cnt == 0) && + (node_ptr->comp_job_cnt == 0)) { + /* no jobs, node is drained */ + jobacct_g_node_down(node_ptr, now, NULL); + } select_g_update_node_state(node_inx, node_ptr->node_state); @@ -1500,6 +1525,7 @@ validate_node_specs (char *node_name, uint16_t cpus, node_ptr->last_idle = now; } xfree(node_ptr->reason); + jobacct_g_node_up(node_ptr, now); } else if ((base_state == NODE_STATE_DOWN) && (slurmctld_conf.ret2service == 1) && (node_ptr->reason != NULL) && @@ -1518,6 +1544,7 @@ validate_node_specs (char *node_name, uint16_t cpus, xfree(node_ptr->reason); reset_job_priority(); trigger_node_up(node_ptr); + jobacct_g_node_up(node_ptr, now); } else if ((base_state == NODE_STATE_ALLOCATED) && (job_count == 0)) { /* job vanished */ last_node_update = now; @@ -1691,6 +1718,7 @@ extern int validate_nodes_via_front_end(uint32_t job_count, node_ptr->last_idle = now; } xfree(node_ptr->reason); + jobacct_g_node_up(node_ptr, now); } else if ((base_state == NODE_STATE_DOWN) && (slurmctld_conf.ret2service == 1)) { updated_job = true; @@ -1712,6 +1740,7 @@ extern int validate_nodes_via_front_end(uint32_t job_count, node_ptr->name); xfree(node_ptr->reason); trigger_node_up(node_ptr); + jobacct_g_node_up(node_ptr, now); } else if ((base_state == NODE_STATE_ALLOCATED) && (jobs_on_node == 0)) { /* job vanished */ updated_job = true; @@ -1829,6 +1858,7 @@ static void _node_did_resp(struct node_record *node_ptr) last_node_update = now; node_ptr->last_idle = now; node_ptr->node_state = NODE_STATE_IDLE | node_flags; + jobacct_g_node_up(node_ptr, now); } if ((base_state == NODE_STATE_DOWN) && (slurmctld_conf.ret2service == 1) && @@ -1840,6 +1870,8 @@ static void _node_did_resp(struct node_record *node_ptr) info("node_did_resp: node %s returned to service", node_ptr->name); xfree(node_ptr->reason); + trigger_node_up(node_ptr); + jobacct_g_node_up(node_ptr, now); } base_state = node_ptr->node_state & NODE_STATE_BASE; if ((base_state == NODE_STATE_IDLE) @@ -1920,6 +1952,7 @@ static void _node_not_resp (struct node_record *node_ptr, time_t msg_time) void set_node_down (char *name, char *reason) { struct node_record *node_ptr; + time_t now = time(NULL); node_ptr = find_node_record (name); if (node_ptr == NULL) { @@ -1927,7 +1960,6 @@ void set_node_down (char *name, char *reason) return; } - _make_node_down(node_ptr); (void) kill_running_job_by_node_name(name, false); if ((node_ptr->reason == NULL) || (strncmp(node_ptr->reason, "Not responding", 14) == 0)) { @@ -1942,6 +1974,7 @@ void set_node_down (char *name, char *reason) node_ptr->reason = xstrdup(reason); xstrcat(node_ptr->reason, time_buf); } + _make_node_down(node_ptr, now); return; } @@ -2138,6 +2171,8 @@ extern void make_node_comp(struct node_record *node_ptr, if ((node_ptr->run_job_cnt == 0) && (node_ptr->comp_job_cnt == 0)) { bit_set(idle_node_bitmap, inx); + if (node_ptr->node_state & NODE_STATE_DRAIN) + jobacct_g_node_down(node_ptr, now, NULL); } if (base_state == NODE_STATE_DOWN) { @@ -2152,11 +2187,11 @@ extern void make_node_comp(struct node_record *node_ptr, } /* _make_node_down - flag specified node as down */ -static void _make_node_down(struct node_record *node_ptr) +static void _make_node_down(struct node_record *node_ptr, time_t event_time) { int inx = node_ptr - node_record_table_ptr; uint16_t node_flags; - + xassert(node_ptr); last_node_update = time (NULL); node_flags = node_ptr->node_state & NODE_STATE_FLAGS; @@ -2168,6 +2203,7 @@ static void _make_node_down(struct node_record *node_ptr) bit_clear (up_node_bitmap, inx); select_g_update_node_state(inx, node_ptr->node_state); trigger_node_down(node_ptr); + jobacct_g_node_down(node_ptr, event_time, NULL); } /* @@ -2240,6 +2276,7 @@ void make_node_idle(struct node_record *node_ptr, debug3("make_node_idle: Node %s is DRAINED", node_ptr->name); node_ptr->last_idle = now; + jobacct_g_node_down(node_ptr, now, NULL); } else if (node_ptr->run_job_cnt) { node_ptr->node_state = NODE_STATE_ALLOCATED | node_flags; } else { diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index 1baf6bf9d0b57353b7390e5f585a4ee72efdf954..16f7661360d4da19c34a06d64368893e596f2d99 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -2,7 +2,7 @@ * node_scheduler.c - select and allocated nodes to jobs * Note: there is a global node table (node_record_table_ptr) * - * $Id: node_scheduler.c 12863 2007-12-19 23:14:45Z jette $ + * $Id: node_scheduler.c 13061 2008-01-22 21:23:56Z da $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -61,6 +61,7 @@ #include "src/common/xassert.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" +#include "src/common/slurm_jobacct.h" #include "src/slurmctld/agent.h" #include "src/slurmctld/node_scheduler.h" @@ -183,7 +184,7 @@ extern void deallocate_nodes(struct job_record *job_ptr, bool timeout, if (select_g_job_fini(job_ptr) != SLURM_SUCCESS) error("select_g_job_fini(%u): %m", job_ptr->job_id); - + agent_args = xmalloc(sizeof(agent_arg_t)); if (timeout) agent_args->msg_type = REQUEST_KILL_TIMELIMIT; @@ -225,6 +226,7 @@ extern void deallocate_nodes(struct job_record *job_ptr, bool timeout, delete_step_records(job_ptr, 1); slurm_sched_schedule(); } + if (agent_args->node_count == 0) { error("Job %u allocated no nodes to be killed on", job_ptr->job_id); @@ -234,6 +236,11 @@ extern void deallocate_nodes(struct job_record *job_ptr, bool timeout, xfree(agent_args); return; } + + /* log this in the accounting plugin since it was allocated + * something */ + jobacct_g_job_complete_slurmctld(job_ptr); + agent_args->msg_args = kill_job; agent_queue_request(agent_args); return; @@ -1153,6 +1160,8 @@ extern int select_nodes(struct job_record *job_ptr, bool test_only, if (job_ptr->mail_type & MAIL_JOB_BEGIN) mail_job_info(job_ptr, MAIL_JOB_BEGIN); + jobacct_g_job_start_slurmctld(job_ptr); + cleanup: if (select_node_bitmap) *select_node_bitmap = select_bitmap; diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index d1a2649db79d911b9bef56a00c9485f7e9c6e285..8c7d733d77d0ce7627ab24206d2a1dd628d36674 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -1,7 +1,7 @@ /*****************************************************************************\ * slurmctld.h - definitions of functions and structures for slurmcltd use * - * $Id: slurmctld.h 12863 2007-12-19 23:14:45Z jette $ + * $Id: slurmctld.h 13061 2008-01-22 21:23:56Z da $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -115,6 +115,9 @@ /* Process pending trigger events every TRIGGER_INTERVAL seconds */ #define TRIGGER_INTERVAL 15 +/* Report current node accounting state every PERIODIC_NODE_ACCT seconds */ +#define PERIODIC_NODE_ACCT 300 + /* Pathname of group file record for checking update times */ #define GROUP_FILE "/etc/group" @@ -219,6 +222,7 @@ extern time_t last_node_update; /* time of last node record update */ extern int node_record_count; /* count in node_record_table_ptr */ extern bitstr_t *avail_node_bitmap; /* bitmap of available nodes, * not DOWN, DRAINED or DRAINING */ +extern uint32_t total_cpus; /* count of CPUs in the entire cluster */ extern bitstr_t *idle_node_bitmap; /* bitmap of idle nodes */ extern bitstr_t *share_node_bitmap; /* bitmap of sharable nodes */ extern bitstr_t *up_node_bitmap; /* bitmap of up nodes, not DOWN */ @@ -304,6 +308,8 @@ struct job_details { time_t begin_time; /* start at this time (srun --being), * resets to time first eligible * (all dependencies satisfied) */ + uint32_t reserved_resources; /* CPU minutes of resources reserved + * for this job while it was pending */ char *work_dir; /* pathname of working directory */ char **argv; /* arguments for a batch job script */ uint16_t argc; /* count of argv elements */ diff --git a/src/squeue/print.c b/src/squeue/print.c index ff4c3ffe642c3106e89832bccd8e437f43ca5145..7aba3a523a5db6f4815a38a9a6178a8144d80507 100644 --- a/src/squeue/print.c +++ b/src/squeue/print.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * print.c - squeue print job functions - * $Id: print.c 12627 2007-11-06 19:48:55Z jette $ + * $Id: print.c 12951 2008-01-04 00:29:45Z jette $ ***************************************************************************** * Copyright (C) 2002 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -210,7 +210,9 @@ int _print_secs(long time, int width, bool right, bool cut_output) hours = (time / 3600) % 24; days = time / 86400; - if (days) + if ((time < 0) || (time > (365 * 24 * 3600))) + snprintf(str, FORMAT_STRING_SIZE, "INVALID"); + else if (days) snprintf(str, FORMAT_STRING_SIZE, "%ld-%2.2ld:%2.2ld:%2.2ld", days, hours, minutes, seconds); diff --git a/testsuite/expect/test7.7.prog.c b/testsuite/expect/test7.7.prog.c index 982787a195b4a2a506df7696e9d4327242659a13..438a76100b68cea4e4dae6484e30eacd2fd89355 100644 --- a/testsuite/expect/test7.7.prog.c +++ b/testsuite/expect/test7.7.prog.c @@ -56,7 +56,7 @@ static int _conn_wiki_port(char *host, int port) bzero((char *) &wiki_addr, sizeof(wiki_addr)); wiki_addr.sin_family = AF_INET; wiki_addr.sin_port = htons(port); - memcpy(&wiki_addr.sin_addr.s_addr, hptr->h_addr, hptr->h_length); + memcpy(&wiki_addr.sin_addr.s_addr, hptr->h_addr, hptr->h_length); sock_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (connect(sock_fd, (struct sockaddr *) &wiki_addr, sizeof(wiki_addr))) { @@ -282,7 +282,9 @@ static void _cancel_job(long my_job_id) char out_msg[128]; snprintf(out_msg, sizeof(out_msg), - "TS=%u AUTH=root DT=CMD=CANCELJOB ARG=%ld TYPE=ADMIN", + "TS=%u AUTH=root DT=CMD=CANCELJOB ARG=%ld " + "TYPE=ADMIN " + "COMMENT=\"cancel comment\" ", (uint32_t) now, my_job_id); _xmit(out_msg); } @@ -293,8 +295,9 @@ static void _start_job(long my_job_id) char out_msg[128]; snprintf(out_msg, sizeof(out_msg), - "TS=%u AUTH=root DT=CMD=STARTJOB ARG=%ld TASKLIST=", - /* Empty TASKLIST means we don't care */ + "TS=%u AUTH=root DT=CMD=STARTJOB ARG=%ld " + "COMMENT=\'start comment\' " + "TASKLIST=", /* Empty TASKLIST means we don't care */ (uint32_t) now, my_job_id); _xmit(out_msg); } @@ -328,6 +331,7 @@ static void _modify_job(long my_job_id) snprintf(out_msg, sizeof(out_msg), "TS=%u AUTH=root DT=CMD=MODIFYJOB ARG=%ld " + /* "MINSTARTTIME=55555 " */ /* "JOBNAME=foo " */ /* "PARTITION=pdebug " */ /* "NODES=2 " */