From f04daf0b8bb6511743e0d6a65a8f02a2261bd2af Mon Sep 17 00:00:00 2001 From: Mehdi Dogguy <mehdi@debian.org> Date: Mon, 8 Sep 2014 21:31:07 +0200 Subject: [PATCH] Imported Upstream version 1.2.25 --- META | 4 +- NEWS | 27 +- doc/man/man1/sacct.1 | 1 - doc/man/man1/salloc.1 | 35 +- doc/man/man1/sbatch.1 | 6 +- doc/man/man1/sbcast.1 | 12 +- doc/man/man1/sinfo.1 | 2 +- doc/man/man1/slaunch.1 | 14 +- doc/man/man1/srun.1 | 16 +- doc/man/man1/strigger.1 | 38 +- doc/man/man3/slurm_kill_job.3 | 1 - slurm.spec | 6 +- src/api/job_info.c | 20 +- src/plugins/jobacct/common/common_slurmctld.c | 2 +- src/plugins/jobacct/gold/Makefile.am | 6 +- src/plugins/jobacct/gold/Makefile.in | 11 +- src/plugins/jobacct/gold/agent.c | 675 ++++++++++++++ src/plugins/jobacct/gold/agent.h | 170 ++++ src/plugins/jobacct/gold/jobacct_gold.c | 820 +++++++++++------- src/plugins/mpi/mvapich/mvapich.c | 13 +- .../select/bluegene/plugin/select_bluegene.c | 9 +- src/sacct/options.c | 42 +- src/sacct/print.c | 56 ++ src/sacct/sacct.c | 1 + src/sacct/sacct.h | 3 + src/salloc/opt.c | 52 +- src/salloc/opt.h | 8 +- src/salloc/salloc.c | 31 +- src/sbatch/opt.c | 5 + src/sbcast/agent.c | 11 +- src/sbcast/opts.c | 14 +- src/sbcast/sbcast.c | 12 +- src/sbcast/sbcast.h | 6 +- src/scontrol/update_job.c | 4 + src/slurmctld/controller.c | 5 +- src/slurmctld/job_mgr.c | 22 +- src/slurmctld/job_scheduler.c | 20 +- src/slurmctld/node_mgr.c | 6 +- src/slurmctld/node_scheduler.c | 9 +- src/slurmctld/step_mgr.c | 5 +- src/slurmd/slurmstepd/mgr.c | 15 +- src/srun/opt.c | 7 +- src/srun/opt.h | 4 +- 43 files changed, 1807 insertions(+), 419 deletions(-) create mode 100644 src/plugins/jobacct/gold/agent.c create mode 100644 src/plugins/jobacct/gold/agent.h diff --git a/META b/META index 55d315864..32ed21c29 100644 --- a/META +++ b/META @@ -3,9 +3,9 @@ Api_revision: 0 Major: 1 Meta: 1 - Micro: 24 + Micro: 25 Minor: 2 Name: slurm Release: 1 Release_tags: - Version: 1.2.24 + Version: 1.2.25 diff --git a/NEWS b/NEWS index 195b78d72..e50a73535 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,31 @@ This file describes changes in recent versions of SLURM. It primarily documents those changes that are of interest to users and admins. +* Changes in SLURM 1.2.25 +========================= + -- Bug fix for setting exit code in accounting for batch script. + -- Add salloc option, --no-shell (for LSF). + -- Added new options for sacct output + -- mvapich: Ensure MPIRUN_ID is unique for all job steps within a job. + (Fixes crashes when running multiple job steps within a job on one node) + -- Prevent "scontrol show job" from failing with buffer overflow when a job + has a very long Comment field. + -- Make certain that a job step is purged when a job has been completed. + Previous versions could have the job step persist if an allocated node + went DOWN and the slurmctld restarted. + -- Fix bug in sbcast that can cause communication problems for large files. + -- Add sbcast option -t/--timeout and SBCAST_TIMEOUT environment variable + to control message timeout. + -- Add threaded agent to manage a queue of Gold update requests for + performance reasons. + -- Add salloc options --chdir and --get-user-env (for Moab). + -- Modify scontrol update to support job comment changes. + -- Do not clear a DRAINED node's reason field when slurmctld restarts. + -- Do not cancel a pending job if Moab or Maui try to start it on unusable nodes. + Leave the job queued. + -- Add --requeue option to srun and sbatch (these undocumented options have no + effect in slurm v1.2, but are legitimate options in slurm v1.3). + * Changes in SLURM 1.2.24 ========================= -- In sched/wiki and sched/wiki2, support non-zero UPDATE_TIME specification @@ -2826,4 +2851,4 @@ documents those changes that are of interest to users and admins. -- Change directory to /tmp in slurmd if daemonizing. -- Logfiles are reopened on reconfigure. -$Id: NEWS 13393 2008-02-27 23:07:43Z da $ +$Id: NEWS 13623 2008-03-17 16:46:23Z jette $ diff --git a/doc/man/man1/sacct.1 b/doc/man/man1/sacct.1 index a441a1dd8..83ebea364 100644 --- a/doc/man/man1/sacct.1 +++ b/doc/man/man1/sacct.1 @@ -1,5 +1,4 @@ .\" $Header$ -.TA "s" .TH sacct 1 .ds )H "Hewlett\-Packard Company .SH "NAME" diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index fd07da0bd..15597c132 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -1,15 +1,17 @@ -.TH "salloc" "1" "SLURM 1.2" "August 2007" "SLURM Commands" +.TH "salloc" "1" "SLURM 1.2" "February 2008" "SLURM Commands" .SH "NAME" .LP salloc \- Obtain a SLURM job allocation (a set of nodes), execute a command, and then release the allocation when the command is finished. .SH "SYNOPSIS" .LP salloc [\fIoptions\fP] [<\fIcommand\fP> [\fIcommand args\fR]] + .SH "DESCRIPTION" .LP salloc is used to allocate a SLURM job allocation, which is a set of resources (nodes), possibly with some set of constraints (e.g. number of processors per node). When salloc successfully obtains the requested allocation, it then runs the command specified by the user. Finally, when the user specified command is complete, salloc relinquishes the job allocation. The command may be any program the user wishes. Some typical commands are xterm, a shell script containing srun commands, and srun (see the EXAMPLES section). + .SH "OPTIONS" .LP @@ -82,6 +84,10 @@ the \-\-cpus\-per\-task=3 options, the controller knows that each task requires 3 processors on the same node, and the controller will grant an allocation of 4 nodes, one for each of the 4 tasks. +.TP +\fB\-D\fR, \fB\-\-chdir\fR=\fIpath\fR +change directory to \fIpath\fR before beginning execution. + .TP \fB\-d\fR, \fB\-\-dependency\fR[=]<\fIjobid\fR> Defer the start of this job until the specified \fIjobid\fR has completed. @@ -104,6 +110,28 @@ in the file. Duplicate node names in the file will be ignored. The order of the node names in the list is not important; the node names will be sorted my SLURM. +.TP +\fB\-\-get\-user\-env\fR[=\fItimeout\fR][\fImode\fR] +This option will load login environment variables for the user specified +in the \fB\-\-uid\fR option. +The environment variables are retrieved by running something of this sort +"su - <username> -c /usr/bin/env" and parsing the output. +Be aware that any environment variables already set in salloc's environment +will take precedence over any environment variables in the user's +login environment. +The optional \fItimeout\fR value is in seconds. Default value is 3 seconds. +The optional \fImode\fR value control the "su" options. +With a \fImode\fR value of "S", "su" is executed without the "\-" option. +With a \fImode\fR value of "L", "su" is executed with the "\-" option, +replicating the login environment. +If \fImode\fR not specified, the mode established at SLURM build time +is used. +Example of use include "\-\-get\-user\-env", "\-\-get\-user\-env=10" +"\-\-get\-user\-env=10L", and "\-\-get\-user\-env=S". +NOTE: This option only works if the caller has an +effective uid of "root". +This option was originally created for use by Moab. + .TP \fB\-\-gid\fR[=]<\fIgroup\fR> If \fBsalloc\fR is run as root, and the \fB\-\-gid\fR option is used, @@ -230,6 +258,11 @@ ignored if \fISchedulerType=sched/wiki\fR or \fB\-\-no\-bell\fR Silence salloc's use of the terminal bell. Also see the option \fB\-\-bell\fR. +.TP +\fB\-\-no\-shell\fR +immediately exit after allocating resources instead of spawning a +shell when used with the \fB\-A\fR, \fB\-\-allocate\fR option. + .TP \fB\-O\fR, \fB\-\-overcommit\fR Overcommit resources. Normally, \fBsbatch\fR will allocate one cpu per diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index 08b034a81..9971fdfc1 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -123,7 +123,7 @@ will be sorted my SLURM. This option will tell sbatch to retrieve the login environment variables for the user specified in the \fB\-\-uid\fR option. The environment variables are retrieved by running something of this sort -"su - <username> -c /usr/bin/env" and parsing the output. +"su \- <username> \-c /usr/bin/env" and parsing the output. Be aware that any environment variables already set in sbatch's environment will take precedence over any environment variables in the user's login environment. @@ -482,7 +482,7 @@ Force the allocated nodes to reboot before starting the job. .TP \fB\-\-wrap\fR[=]<\fIcommand string\fR> Sbatch will wrap the specified command string in a simple "sh" shell script, -and submit that script to the slurm controller. When --wrap is used, +and submit that script to the slurm controller. When \-\-wrap is used, a script name and arguments may not be specified on the command line; instead the sbatch-generated wrapper script is used. @@ -583,7 +583,7 @@ $ cat myscript .br #!/bin/sh .br -#SBATCH --time=1 +#SBATCH \-\-time=1 .br srun hostname |sort .br diff --git a/doc/man/man1/sbcast.1 b/doc/man/man1/sbcast.1 index acd4cf558..5984ab394 100644 --- a/doc/man/man1/sbcast.1 +++ b/doc/man/man1/sbcast.1 @@ -4,7 +4,7 @@ sbcast \- transmit a file to the nodes allocated to a SLURM job. .SH "SYNOPSIS" -\fBsbcast\fR [\-CfpsvV] SOURCE DEST +\fBsbcast\fR [\-CfpstvV] SOURCE DEST .SH "DESCRIPTION" \fBsbcast\fR is used to transmit a file to all nodes allocated @@ -44,6 +44,13 @@ This size subject to rounding and range limits to maintain good performance. This value may need to be set on systems with very limited memory. .TP +\fB\-t\fB \fIseconds\fR, fB\-\-timeout\fR=\fIseconds\fR +Specify the message timeout in seconds. +The default value is \fIMessageTimeout\fR as reported by +"scontrol show config". +Setting a higher value may be necessitated by relatively slow +I/O performance on the compute node disks. +.TP \fB\-v\fR, \fB\-\-verbose\fR Provide detailed event logging through program execution. .TP @@ -71,6 +78,9 @@ these settings.) .TP \fBSBCAST_SIZE\fR \fB\-s\fR \fIsize\fR, \fB\-\-size\fR=\fIsize\fR +.TP +\fBSBCAST_TIMEOUT\fR +\fB\-t\fB \fIseconds\fR, fB\-\-timeout\fR=\fIseconds\fR .SH "EXAMPLE" diff --git a/doc/man/man1/sinfo.1 b/doc/man/man1/sinfo.1 index 03af1c5aa..ae8e1c2ee 100644 --- a/doc/man/man1/sinfo.1 +++ b/doc/man/man1/sinfo.1 @@ -454,7 +454,7 @@ debug* up 30:00 0/8/0/8 adev[0-7] Report more complete information about the partition debug: .nf -> sinfo --long --partition=debug +> sinfo \-\-long \-\-partition=debug PARTITION AVAIL TIMELIMIT JOB_SIZE ROOT SHARE GROUPS NODES STATE NODELIST debug* up 30:00 8 no no all 8 idle dev[0-7] .fi diff --git a/doc/man/man1/slaunch.1 b/doc/man/man1/slaunch.1 index 823177200..d227eec4a 100644 --- a/doc/man/man1/slaunch.1 +++ b/doc/man/man1/slaunch.1 @@ -1,4 +1,4 @@ -.\" $Id: slaunch.1 12842 2007-12-17 19:33:54Z jette $ +.\" $Id: slaunch.1 13559 2008-03-11 22:41:30Z jette $ .TH "slaunch" "1" "SLURM 1.2" "October 2006" "SLURM Commands" .SH "NAME" .LP @@ -270,7 +270,11 @@ The options include a number representing the size of the task block. This is followed by an optional specification of the task distribution scheme within a block of tasks and between the blocks of tasks. For more details (including examples and diagrams), please see -https://computing.llnl.gov/linux/slurm/mc_support.html and +.br +https://computing.llnl.gov/linux/slurm/mc_support.html +.br +and +.br https://computing.llnl.gov/linux/slurm/dist_plane.html. .TP .B hostfile @@ -598,8 +602,8 @@ Same as \fB\-d\fR or \fB\-\-slurmd\-debug\fR .PP slaunch will set the following environment variables which will appear in the environments of all tasks in the job step. Since slaunch -sets these variables itself, they will also be available to --prolog -and --epilog scripts. (Notice that the "backwards compatibility" environment +sets these variables itself, they will also be available to \-\-prolog +and \-\-epilog scripts. (Notice that the "backwards compatibility" environment variables clobber some of the variables that were set by salloc or sbatch at job allocation time. The newer SLURM_JOB_* and SLURM_STEP_* names do not conflict, so any task in any job step can easily determine the parameters @@ -628,7 +632,7 @@ The number of tasks on each node in the job step. Additionally, SLURM daemons will ensure that the the following variables are set in the environments of all tasks in the job step. Many of the following variables will have different values in each task's environment. (These -variables are not available to the slaunch --prolog and --epilog scripts.) +variables are not available to the slaunch \-\-prolog and \-\-epilog scripts.) .TP \fBSLURM_NODEID\fR diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index e58121e2c..117209122 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -1,4 +1,4 @@ -\." $Id: srun.1 13256 2008-02-12 22:02:36Z jette $ +.\" $Id: srun.1 13559 2008-03-11 22:41:30Z jette $ .\" .TH SRUN "1" "July 2007" "srun 1.2" "slurm components" @@ -285,7 +285,7 @@ even if consumable resources are enabled For a batch script submission, this option will tell srun to retrieve the login environment variables for the user specified in the \fB\-\-uid\fR option. The environment variables are retrieved by running something of this sort -"su - <username> -c /usr/bin/env" and parsing the output. +"su \- <username> \-c /usr/bin/env" and parsing the output. Be aware that any environment variables already set in srun's environment will take precedence over any environment variables in the user's login environment. @@ -425,7 +425,11 @@ The options include a number representing the size of the task block. This is followed by an optional specification of the task distribution scheme within a block of tasks and between the blocks of tasks. For more details (including examples and diagrams), please see -https://computing.llnl.gov/linux/slurm/mc_support.html and +.br +https://computing.llnl.gov/linux/slurm/mc_support.html +.br +and +.br https://computing.llnl.gov/linux/slurm/dist_plane.html. .TP .B arbitrary @@ -434,7 +438,7 @@ listed in file designated by the environment variable SLURM_HOSTFILE. If this variable is listed it will over ride any other method specified. If not set the method will default to block. Inside the hostfile must contain at minimum the number of hosts requested. If requesting tasks -(-n) your tasks will be laid out on the nodes in the order of the file. +(\-n) your tasks will be laid out on the nodes in the order of the file. .RE .TP @@ -901,7 +905,7 @@ Request a specific list of hosts. The job will contain \fIat least\fR these hosts. The list may be specified as a comma\-separated list of hosts, a range of hosts (host[1\-5,7,...] for example), or a filename. The host list will be assumed to be a filename if it contains a "/" -character. If you specify a max node count (-N1-2) if there are more +character. If you specify a max node count (\-N1\-2) if there are more than 2 hosts in the file only the first 2 nodes will be used in the request list. @@ -1459,7 +1463,7 @@ by these various MPI implementation. 1. SLURM directly launches the tasks and performs initialization of communications (Quadrics MPI, MPICH2, MPICH-GM, MVAPICH, MVAPICH2 -and some MPICH1 modes). For example: "srun -n16 a.out". +and some MPICH1 modes). For example: "srun \-n16 a.out". 2. SLURM creates a resource allocation for the job and then mpirun launches tasks using SLURM's infrastructure (OpenMPI, diff --git a/doc/man/man1/strigger.1 b/doc/man/man1/strigger.1 index 82de9dfbb..5d06e3a6c 100644 --- a/doc/man/man1/strigger.1 +++ b/doc/man/man1/strigger.1 @@ -4,11 +4,11 @@ strigger \- Used set, get or clear Slurm trigger information. .SH "SYNOPSIS" -\fBstrigger --set\fR [\fIOPTIONS\fR...] +\fBstrigger \-\-set\fR [\fIOPTIONS\fR...] .br -\fBstrigger --get\fR [\fIOPTIONS\fR...] +\fBstrigger \-\-get\fR [\fIOPTIONS\fR...] .br -\fBstrigger --clear\fR [\fIOPTIONS\fR...] +\fBstrigger \-\-clear\fR [\fIOPTIONS\fR...] .SH "DESCRIPTION" \fBstrigger\fR is used to set, get or clear Slurm trigger information. @@ -182,13 +182,13 @@ any node in the cluster goes down. > cat /usr/sbin/slurm_admin_notify #!/bin/bash # Submit trigger for next event - strigger --set --node --down \\ - --program=/usr/sbin/slurm_admin_notify - # Notify administrator using by e-mail - /bin/mail slurm_admin@site.com -s NodeDown + strigger \-\-set \-\-node \-\-down \\ + \-\-program=/usr/sbin/slurm_admin_notify + # Notify administrator using by e\-mail + /bin/mail slurm_admin@site.com \-s NodeDown - > strigger --set --node --down \\ - --program=/usr/sbin/slurm_admin_notify + > strigger \-\-set \-\-node \-\-down \\ + \-\-program=/usr/sbin/slurm_admin_notify .fi .PP @@ -197,8 +197,8 @@ any node in the cluster remains in the idle state for at least 600 seconds. .nf - > strigger --set --node --idle --offset=600 \\ - --program=/usr/sbin/slurm_suspend_node + > strigger \-\-set \-\-node \-\-idle \-\-offset=600 \\ + \-\-program=/usr/sbin/slurm_suspend_node .fi .PP @@ -206,8 +206,8 @@ Execute the program "/home/joe/clean_up" when job 1234 is within 10 minutes of reaching its time limit. .nf - > strigger --set --jobid=1234 --time --offset=-600 \\ - --program=/home/joe/clean_up + > strigger \-\-set \-\-jobid=1234 \-\-time \-\-offset=-600 \\ + \-\-program=/home/joe/clean_up .fi .PP @@ -215,17 +215,17 @@ Execute the program "/home/joe/node_died" when any node allocated to job 1234 enters the DOWN state. .nf - > strigger --set --jobid=1234 --down \\ - --program=/home/joe/node_died + > strigger \-\-set \-\-jobid=1234 \-\-down \\ + \-\-program=/home/joe/node_died .fi .PP Show all triggers associated with job 1235. .nf - > strigger --get --jobid=1235 + > strigger \-\-get \-\-jobid=1235 TRIG_ID RES_TYPE RES_ID TYPE OFFSET USER PROGRAM - 123 job 1235 time -600 joe /home/bob/clean_up + 123 job 1235 time \-600 joe /home/bob/clean_up 125 job 1235 down 0 joe /home/bob/node_died .fi @@ -233,14 +233,14 @@ Show all triggers associated with job 1235. Delete event trigger 125. .fp - > strigger --clear --id=125 + > strigger \-\-clear \-\-id=125 .fi .PP Execute /home/joe/job_fini upon completion of job 1237. .fp - > strigger --set --jobid=1237 --fini --program=/home/joe/job_fini + > strigger \-\-set \-\-jobid=1237 \-\-fini \-\-program=/home/joe/job_fini .fi .SH "COPYING" diff --git a/doc/man/man3/slurm_kill_job.3 b/doc/man/man3/slurm_kill_job.3 index c3e827a9a..3b2e44702 100644 --- a/doc/man/man3/slurm_kill_job.3 +++ b/doc/man/man3/slurm_kill_job.3 @@ -26,7 +26,6 @@ int \fBslurm_kill_job_step\fR ( ); .SH "ARGUMENTS" .LP -.TB \fIbatch_flag\fP If non\-zero then signal only the batch job shell. .TP diff --git a/slurm.spec b/slurm.spec index 3e8c10fde..9c83f1e97 100644 --- a/slurm.spec +++ b/slurm.spec @@ -60,14 +60,14 @@ %endif Name: slurm -Version: 1.2.24 +Version: 1.2.25 Release: 1 Summary: Simple Linux Utility for Resource Management License: GPL Group: System Environment/Base -Source: slurm-1.2.24.tar.bz2 +Source: slurm-1.2.25.tar.bz2 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release} URL: https://computing.llnl.gov/linux/slurm/ BuildRequires: openssl-devel >= 0.9.6 openssl >= 0.9.6 @@ -212,7 +212,7 @@ SLURM process tracking plugin for SGI job containers. ############################################################################# %prep -%setup -n slurm-1.2.24 +%setup -n slurm-1.2.25 %build %configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \ diff --git a/src/api/job_info.c b/src/api/job_info.c index 829c6e329..a71cdde15 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * job_info.c - get/print the job state information of slurm - * $Id: job_info.c 12627 2007-11-06 19:48:55Z jette $ + * $Id: job_info.c 13465 2008-03-04 16:51:08Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -127,7 +127,7 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) char time_str[32], select_buf[122]; struct group *group_info = NULL; char tmp1[128], tmp2[128]; - char tmp_line[128]; + char tmp_line[512]; char *ionodes = NULL; uint16_t exit_status = 0, term_sig = 0; char *out = NULL; @@ -230,12 +230,12 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) xstrfmtcat(out, "%s=", nodelist); xstrcat(out, job_ptr->nodes); if(ionodes) { - sprintf(tmp_line, "[%s]", ionodes); + snprintf(tmp_line, sizeof(tmp_line), "[%s]", ionodes); xstrcat(out, tmp_line); xfree(ionodes); } - sprintf(tmp_line, " %sIndices=", nodelist); + snprintf(tmp_line, sizeof(tmp_line), " %sIndices=", nodelist); xstrcat(out, tmp_line); for (j = 0; (job_ptr->node_inx && (job_ptr->node_inx[j] != -1)); j+=2) { @@ -307,10 +307,12 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) #ifdef HAVE_BG convert_num_unit((float)job_ptr->num_nodes, tmp2, sizeof(tmp2), UNIT_NONE); - sprintf(tmp_line, "ReqProcs=%s MinBPs=%s ", tmp1, tmp2); + snprintf(tmp_line, sizeof(tmp_line), "ReqProcs=%s MinBPs=%s ", + tmp1, tmp2); #else _sprint_range(tmp2, job_ptr->num_nodes, job_ptr->max_nodes); - sprintf(tmp_line, "ReqProcs=%s ReqNodes=%s ", tmp1, tmp2); + snprintf(tmp_line, sizeof(tmp_line), "ReqProcs=%s ReqNodes=%s ", + tmp1, tmp2); #endif xstrcat(out, tmp_line); @@ -422,7 +424,8 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) /****** Line 14 ******/ slurm_make_time_str((time_t *)&job_ptr->submit_time, time_str, sizeof(time_str)); - sprintf(tmp_line, "SubmitTime=%s ", time_str); + snprintf(tmp_line, sizeof(tmp_line), "SubmitTime=%s ", + time_str); xstrcat(out, tmp_line); if (job_ptr->suspend_time) { slurm_make_time_str ((time_t *)&job_ptr->suspend_time, @@ -440,7 +443,8 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) xstrcat(out, " "); else xstrcat(out, "\n "); - sprintf(tmp_line, "Comment=%s ", job_ptr->comment); + snprintf(tmp_line, sizeof(tmp_line), "Comment=%s ", + job_ptr->comment); xstrcat(out, tmp_line); } diff --git a/src/plugins/jobacct/common/common_slurmctld.c b/src/plugins/jobacct/common/common_slurmctld.c index 872a756ca..26da06484 100644 --- a/src/plugins/jobacct/common/common_slurmctld.c +++ b/src/plugins/jobacct/common/common_slurmctld.c @@ -131,7 +131,7 @@ static int _print_record(struct job_record *job_ptr, job_ptr->user_id, job_ptr->group_id, block_id, data) < 0) rc=SLURM_ERROR; -#ifdef HAVE_FDATSYNC +#ifdef HAVE_FDATASYNC fdatasync(LOGFILE_FD); #endif slurm_mutex_unlock( &logfile_lock ); diff --git a/src/plugins/jobacct/gold/Makefile.am b/src/plugins/jobacct/gold/Makefile.am index 1602fd4b4..8352b6cfe 100644 --- a/src/plugins/jobacct/gold/Makefile.am +++ b/src/plugins/jobacct/gold/Makefile.am @@ -8,9 +8,11 @@ PLUGIN_FLAGS = -module -avoid-version --export-dynamic INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common pkglib_LTLIBRARIES = jobacct_gold.la -jobacct_gold_la_SOURCES = jobacct_gold.c \ +jobacct_gold_la_SOURCES = \ + agent.c agent.h \ + base64.c base64.h \ gold_interface.c gold_interface.h \ - base64.c base64.h + jobacct_gold.c jobacct_gold_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) diff --git a/src/plugins/jobacct/gold/Makefile.in b/src/plugins/jobacct/gold/Makefile.in index 1419248a0..a7710977a 100644 --- a/src/plugins/jobacct/gold/Makefile.in +++ b/src/plugins/jobacct/gold/Makefile.in @@ -74,8 +74,8 @@ am__installdirs = "$(DESTDIR)$(pkglibdir)" pkglibLTLIBRARIES_INSTALL = $(INSTALL) LTLIBRARIES = $(pkglib_LTLIBRARIES) jobacct_gold_la_LIBADD = -am_jobacct_gold_la_OBJECTS = jobacct_gold.lo gold_interface.lo \ - base64.lo +am_jobacct_gold_la_OBJECTS = agent.lo base64.lo gold_interface.lo \ + jobacct_gold.lo jobacct_gold_la_OBJECTS = $(am_jobacct_gold_la_OBJECTS) jobacct_gold_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ @@ -259,9 +259,11 @@ PLUGIN_FLAGS = -module -avoid-version --export-dynamic # Gold job completion logging plugin. INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/src/common pkglib_LTLIBRARIES = jobacct_gold.la -jobacct_gold_la_SOURCES = jobacct_gold.c \ +jobacct_gold_la_SOURCES = \ + agent.c agent.h \ + base64.c base64.h \ gold_interface.c gold_interface.h \ - base64.c base64.h + jobacct_gold.c jobacct_gold_la_LDFLAGS = $(SO_LDFLAGS) $(PLUGIN_FLAGS) all: all-am @@ -333,6 +335,7 @@ mostlyclean-compile: distclean-compile: -rm -f *.tab.c +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/agent.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/base64.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gold_interface.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/jobacct_gold.Plo@am__quote@ diff --git a/src/plugins/jobacct/gold/agent.c b/src/plugins/jobacct/gold/agent.c new file mode 100644 index 000000000..5e4ce97a5 --- /dev/null +++ b/src/plugins/jobacct/gold/agent.c @@ -0,0 +1,675 @@ +/****************************************************************************\ + * agent.c - Agent to queue and process pending Gold requests + * Largely copied from src/common/slurmdbd_defs.c in Slurm v1.3 + ***************************************************************************** + * Copyright (C) 2008 Lawrence Livermore National Security. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Morris Jette <jette1@llnl.gov> + * UCRL-CODE-226842. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +#include <arpa/inet.h> +#include <fcntl.h> +#include <pthread.h> +#include <stdio.h> +#include <syslog.h> +#include <sys/poll.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include "agent.h" +#include "slurm/slurm_errno.h" +#include "src/common/fd.h" +#include "src/common/pack.h" +#include "src/common/slurm_auth.h" +#include "src/common/slurm_protocol_api.h" +#include "src/common/xmalloc.h" +#include "src/common/xsignal.h" +#include "src/common/xstring.h" + +#define _DEBUG 0 +#define GOLD_MAGIC 0xDEAD3219 +#define MAX_AGENT_QUEUE 10000 +#define MAX_GOLD_MSG_LEN 16384 + +static pthread_mutex_t agent_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t agent_cond = PTHREAD_COND_INITIALIZER; +static List agent_list = (List) NULL; +static pthread_t agent_tid = 0; +static time_t agent_shutdown = 0; + +static void * _agent(void *x); +static void _agent_queue_del(void *x); +static void _create_agent(void); +static Buf _load_gold_rec(int fd); +static void _load_gold_state(void); +static int _process_msg(Buf buffer); +static int _save_gold_rec(int fd, Buf buffer); +static void _save_gold_state(void); +static void _sig_handler(int signal); +static void _shutdown_agent(void); + +/**************************************************************************** + * External APIs for use by jobacct_gold.c + ****************************************************************************/ + +/* Initiated a Gold message agent. Recover any saved RPCs. */ +extern int gold_agent_init(void) +{ + slurm_mutex_lock(&agent_lock); + if ((agent_tid == 0) || (agent_list == NULL)) + _create_agent(); + slurm_mutex_unlock(&agent_lock); + + return SLURM_SUCCESS; +} + +/* Terminate a Gold message agent. Save any pending RPCs. */ +extern int gold_agent_fini(void) +{ + /* NOTE: agent_lock not needed for _shutdown_agent() */ + _shutdown_agent(); + + return SLURM_SUCCESS; +} + +/* Send an RPC to the Gold. Do not wait for the reply. The RPC + * will be queued and processed later if Gold is not responding. + * Returns SLURM_SUCCESS or an error code */ +extern int gold_agent_xmit(gold_agent_msg_t *req) +{ + Buf buffer; + int cnt, rc = SLURM_SUCCESS; + static time_t syslog_time = 0; + + buffer = init_buf(MAX_GOLD_MSG_LEN); + pack16(req->msg_type, buffer); + switch (req->msg_type) { + case GOLD_MSG_CLUSTER_PROCS: + gold_agent_pack_cluster_procs_msg( + (gold_cluster_procs_msg_t *) req->data, buffer); + break; + case GOLD_MSG_JOB_COMPLETE: + gold_agent_pack_job_info_msg( + (gold_job_info_msg_t *) req->data, buffer); + break; + case GOLD_MSG_JOB_START: + gold_agent_pack_job_info_msg( + (gold_job_info_msg_t *) req->data, buffer); + break; + case GOLD_MSG_NODE_DOWN: + gold_agent_pack_node_down_msg( + (gold_node_down_msg_t *) req->data, buffer); + break; + case GOLD_MSG_NODE_UP: + gold_agent_pack_node_up_msg( + (gold_node_up_msg_t *) req->data, buffer); + break; + case GOLD_MSG_STEP_START: + gold_agent_pack_job_info_msg( + (gold_job_info_msg_t *) req->data, buffer); + break; + default: + error("gold: Invalid message send type %u", + req->msg_type); + free_buf(buffer); + return SLURM_ERROR; + } + + slurm_mutex_lock(&agent_lock); + if ((agent_tid == 0) || (agent_list == NULL)) { + _create_agent(); + if ((agent_tid == 0) || (agent_list == NULL)) { + slurm_mutex_unlock(&agent_lock); + free_buf(buffer); + return SLURM_ERROR; + } + } + cnt = list_count(agent_list); +#if _DEBUG + info("gold agent: queuing msg_type %u queue_len %d", + req->msg_type, cnt); +#endif + if ((cnt >= (MAX_AGENT_QUEUE / 2)) && + (difftime(time(NULL), syslog_time) > 120)) { + /* Log critical error every 120 seconds */ + syslog_time = time(NULL); + error("gold: agent queue filling, RESTART GOLD NOW"); + syslog(LOG_CRIT, "*** RESTART GOLD NOW ***"); + } + if (cnt < MAX_AGENT_QUEUE) { + if (list_enqueue(agent_list, buffer) == NULL) + fatal("list_enqueue: memory allocation failure"); + } else { + error("gold: agent queue is full, discarding request"); + rc = SLURM_ERROR; + } + slurm_mutex_unlock(&agent_lock); + pthread_cond_broadcast(&agent_cond); + return rc; +} + +/**************************************************************************** + * Functions for agent to manage queue of pending message for Gold + ****************************************************************************/ +static void _create_agent(void) +{ + if (agent_list == NULL) { + agent_list = list_create(_agent_queue_del); + if (agent_list == NULL) + fatal("list_create: malloc failure"); + _load_gold_state(); + } + + if (agent_tid == 0) { + pthread_attr_t agent_attr; + slurm_attr_init(&agent_attr); + pthread_attr_setdetachstate(&agent_attr, + PTHREAD_CREATE_DETACHED); + if (pthread_create(&agent_tid, &agent_attr, _agent, NULL) || + (agent_tid == 0)) + fatal("pthread_create: %m"); + } +} + +static void _agent_queue_del(void *x) +{ + Buf buffer = (Buf) x; + free_buf(buffer); +} + +static void _shutdown_agent(void) +{ + int i; + + if (agent_tid) { + agent_shutdown = time(NULL); + pthread_cond_broadcast(&agent_cond); + for (i=0; ((i<10) && agent_tid); i++) { + sleep(1); + pthread_cond_broadcast(&agent_cond); + if (pthread_kill(agent_tid, SIGUSR1)) + agent_tid = 0; + } + if (agent_tid) { + error("gold: agent failed to shutdown gracefully"); + } else + agent_shutdown = 0; + } +} + +static void *_agent(void *x) +{ + int cnt, rc; + Buf buffer; + struct timespec abs_time; + static time_t fail_time = 0; + int sigarray[] = {SIGUSR1, 0}; + + /* Prepare to catch SIGUSR1 to interrupt pending + * I/O and terminate in a timely fashion. */ + xsignal(SIGUSR1, _sig_handler); + xsignal_unblock(sigarray); + + while (agent_shutdown == 0) { + slurm_mutex_lock(&agent_lock); + if (agent_list) + cnt = list_count(agent_list); + else + cnt = 0; + if ((cnt == 0) || + (fail_time && (difftime(time(NULL), fail_time) < 10))) { + abs_time.tv_sec = time(NULL) + 10; + abs_time.tv_nsec = 0; + rc = pthread_cond_timedwait(&agent_cond, &agent_lock, + &abs_time); + slurm_mutex_unlock(&agent_lock); + continue; + } else if ((cnt > 0) && ((cnt % 50) == 0)) + info("gold: agent queue size %u", cnt); + /* Leave item on the queue until processing complete */ + if (agent_list) + buffer = (Buf) list_peek(agent_list); + else + buffer = NULL; + slurm_mutex_unlock(&agent_lock); + if (buffer == NULL) + continue; + + /* NOTE: agent_lock is clear here, so we can add more + * requests to the queue while waiting for this RPC to + * complete. */ + rc = _process_msg(buffer); + if (rc != SLURM_SUCCESS) { + if (agent_shutdown) + break; + error("gold: Failure sending message"); + } + + slurm_mutex_lock(&agent_lock); + if (agent_list && (rc != EAGAIN)) { + buffer = (Buf) list_dequeue(agent_list); + free_buf(buffer); + fail_time = 0; + } else { + fail_time = time(NULL); + } + slurm_mutex_unlock(&agent_lock); + } + + slurm_mutex_lock(&agent_lock); + _save_gold_state(); + if (agent_list) { + list_destroy(agent_list); + agent_list = NULL; + } + slurm_mutex_unlock(&agent_lock); + return NULL; +} + +static int _process_msg(Buf buffer) +{ + int rc; + uint16_t msg_type; + uint32_t msg_size; + + /* We save the full buffer size in case the RPC fails + * and we need to save state for later recovery. */ + msg_size = get_buf_offset(buffer); + set_buf_offset(buffer, 0); + safe_unpack16(&msg_type, buffer); +#if _DEBUG + info("gold agent: processing msg_type %u", msg_type); +#endif + switch (msg_type) { + case GOLD_MSG_CLUSTER_PROCS: + rc = agent_cluster_procs(buffer); + break; + case GOLD_MSG_JOB_COMPLETE: + rc = agent_job_complete(buffer); + break; + case GOLD_MSG_JOB_START: + rc = agent_job_start(buffer); + break; + case GOLD_MSG_NODE_DOWN: + rc = agent_node_down(buffer); + break; + case GOLD_MSG_NODE_UP: + rc = agent_node_up(buffer); + break; + case GOLD_MSG_STEP_START: + rc = agent_step_start(buffer); + break; + default: + error("gold: Invalid send message type %u", msg_type); + rc = SLURM_ERROR; /* discard entry and continue */ + } + set_buf_offset(buffer, msg_size); /* restore buffer size */ + return rc; + +unpack_error: + /* If the message format is bad return SLURM_SUCCESS to get + * it off of the queue since we can't work with it anyway */ + error("gold agent: message unpack error"); + return SLURM_ERROR; +} + +static void _save_gold_state(void) +{ + char *gold_fname; + Buf buffer; + int fd, rc, wrote = 0; + + gold_fname = slurm_get_state_save_location(); + xstrcat(gold_fname, "/gold.messages"); + fd = open(gold_fname, O_WRONLY | O_CREAT | O_TRUNC, 0600); + if (fd < 0) { + error("gold: Creating state save file %s", gold_fname); + } else if (agent_list) { + while ((buffer = list_dequeue(agent_list))) { + rc = _save_gold_rec(fd, buffer); + free_buf(buffer); + if (rc != SLURM_SUCCESS) + break; + wrote++; + } + } + if (fd >= 0) { + verbose("gold: saved %d pending RPCs", wrote); + (void) close(fd); + } + xfree(gold_fname); +} + +static void _load_gold_state(void) +{ + char *gold_fname; + Buf buffer; + int fd, recovered = 0; + + gold_fname = slurm_get_state_save_location(); + xstrcat(gold_fname, "/gold.messages"); + fd = open(gold_fname, O_RDONLY); + if (fd < 0) { + error("gold: Opening state save file %s", gold_fname); + } else { + while (1) { + buffer = _load_gold_rec(fd); + if (buffer == NULL) + break; + if (list_enqueue(agent_list, buffer) == NULL) + fatal("gold: list_enqueue, no memory"); + recovered++; + } + } + if (fd >= 0) { + verbose("gold: recovered %d pending RPCs", recovered); + (void) close(fd); + (void) unlink(gold_fname); /* clear save state */ + } + xfree(gold_fname); +} + +static int _save_gold_rec(int fd, Buf buffer) +{ + ssize_t size, wrote; + uint32_t msg_size = get_buf_offset(buffer); + uint32_t magic = GOLD_MAGIC; + char *msg = get_buf_data(buffer); + + size = sizeof(msg_size); + wrote = write(fd, &msg_size, size); + if (wrote != size) { + error("gold: state save error: %m"); + return SLURM_ERROR; + } + + wrote = 0; + while (wrote < msg_size) { + wrote = write(fd, msg, msg_size); + if (wrote > 0) { + msg += wrote; + msg_size -= wrote; + } else if ((wrote == -1) && (errno == EINTR)) + continue; + else { + error("gold: state save error: %m"); + return SLURM_ERROR; + } + } + + size = sizeof(magic); + wrote = write(fd, &magic, size); + if (wrote != size) { + error("gold: state save error: %m"); + return SLURM_ERROR; + } + + return SLURM_SUCCESS; +} + +static Buf _load_gold_rec(int fd) +{ + ssize_t size, rd_size; + uint32_t msg_size, magic; + char *msg; + Buf buffer; + + size = sizeof(msg_size); + rd_size = read(fd, &msg_size, size); + if (rd_size == 0) + return (Buf) NULL; + if (rd_size != size) { + error("gold: state recover error: %m"); + return (Buf) NULL; + } + if (msg_size > MAX_GOLD_MSG_LEN) { + error("gold: state recover error, msg_size=%u", msg_size); + return (Buf) NULL; + } + + buffer = init_buf((int) msg_size); + if (buffer == NULL) + fatal("gold: create_buf malloc failure"); + set_buf_offset(buffer, msg_size); + msg = get_buf_data(buffer); + size = msg_size; + while (size) { + rd_size = read(fd, msg, size); + if (rd_size > 0) { + msg += rd_size; + size -= rd_size; + } else if ((rd_size == -1) && (errno == EINTR)) + continue; + else { + error("gold: state recover error: %m"); + free_buf(buffer); + return (Buf) NULL; + } + } + + size = sizeof(magic); + rd_size = read(fd, &magic, size); + if ((rd_size != size) || (magic != GOLD_MAGIC)) { + error("gold: state recover error"); + free_buf(buffer); + return (Buf) NULL; + } + + return buffer; +} + +static void _sig_handler(int signal) +{ +} + +/****************************************************************************\ + * Free data structures +\****************************************************************************/ +void inline gold_agent_free_cluster_procs_msg(gold_cluster_procs_msg_t *msg) +{ + xfree(msg); +} + +void inline gold_agent_free_job_info_msg(gold_job_info_msg_t *msg) +{ + if (msg) { + xfree(msg->account); + xfree(msg->name); + xfree(msg->nodes); + xfree(msg->partition); + xfree(msg); + } +} + +void inline gold_agent_free_node_down_msg(gold_node_down_msg_t *msg) +{ + if (msg) { + xfree(msg->hostlist); + xfree(msg->reason); + xfree(msg); + } +} + +void inline gold_agent_free_node_up_msg(gold_node_up_msg_t *msg) +{ + if (msg) { + xfree(msg->hostlist); + xfree(msg); + } +} + +/****************************************************************************\ + * Pack and unpack data structures +\****************************************************************************/ +void inline +gold_agent_pack_cluster_procs_msg(gold_cluster_procs_msg_t *msg, Buf buffer) +{ + pack32(msg->proc_count, buffer); + pack_time(msg->event_time, buffer); +} +int inline +gold_agent_unpack_cluster_procs_msg(gold_cluster_procs_msg_t **msg, Buf buffer) +{ + gold_cluster_procs_msg_t *msg_ptr; + + msg_ptr = xmalloc(sizeof(gold_cluster_procs_msg_t)); + *msg = msg_ptr; + safe_unpack32(&msg_ptr->proc_count, buffer); + safe_unpack_time(&msg_ptr->event_time, buffer); + return SLURM_SUCCESS; + +unpack_error: + xfree(msg_ptr); + *msg = NULL; + return SLURM_ERROR; +} + +void inline +gold_agent_pack_job_info_msg(gold_job_info_msg_t *msg, Buf buffer) +{ + packstr(msg->account, buffer); + pack_time(msg->begin_time, buffer); + pack_time(msg->end_time, buffer); + pack32(msg->exit_code, buffer); + pack32(msg->job_id, buffer); + pack16(msg->job_state, buffer); + packstr(msg->name, buffer); + packstr(msg->nodes, buffer); + packstr(msg->partition, buffer); + pack_time(msg->start_time, buffer); + pack_time(msg->submit_time, buffer); + pack32(msg->total_procs, buffer); + pack32(msg->user_id, buffer); +} + +int inline +gold_agent_unpack_job_info_msg(gold_job_info_msg_t **msg, Buf buffer) +{ + uint16_t uint16_tmp; + gold_job_info_msg_t *msg_ptr = xmalloc(sizeof(gold_job_info_msg_t)); + *msg = msg_ptr; + safe_unpackstr_xmalloc(&msg_ptr->account, &uint16_tmp, buffer); + safe_unpack_time(&msg_ptr->begin_time, buffer); + safe_unpack_time(&msg_ptr->end_time, buffer); + safe_unpack32(&msg_ptr->exit_code, buffer); + safe_unpack32(&msg_ptr->job_id, buffer); + safe_unpack16(&msg_ptr->job_state, buffer); + safe_unpackstr_xmalloc(&msg_ptr->name, &uint16_tmp, buffer); + safe_unpackstr_xmalloc(&msg_ptr->nodes, &uint16_tmp, buffer); + safe_unpackstr_xmalloc(&msg_ptr->partition, &uint16_tmp, buffer); + safe_unpack_time(&msg_ptr->start_time, buffer); + safe_unpack_time(&msg_ptr->submit_time, buffer); + safe_unpack32(&msg_ptr->total_procs, buffer); + safe_unpack32(&msg_ptr->user_id, buffer); + return SLURM_SUCCESS; + +unpack_error: + xfree(msg_ptr->account); + xfree(msg_ptr->name); + xfree(msg_ptr->nodes); + xfree(msg_ptr->partition); + xfree(msg_ptr); + *msg = NULL; + return SLURM_ERROR; +} + +void inline +gold_agent_pack_node_down_msg(gold_node_down_msg_t *msg, Buf buffer) +{ + pack16(msg->cpus, buffer); + pack_time(msg->event_time, buffer); + packstr(msg->hostlist, buffer); + packstr(msg->reason, buffer); +} + +int inline +gold_agent_unpack_node_down_msg(gold_node_down_msg_t **msg, Buf buffer) +{ + gold_node_down_msg_t *msg_ptr; + uint16_t uint16_tmp; + + msg_ptr = xmalloc(sizeof(gold_node_down_msg_t)); + *msg = msg_ptr; + safe_unpack16(&msg_ptr->cpus, buffer); + safe_unpack_time(&msg_ptr->event_time, buffer); + safe_unpackstr_xmalloc(&msg_ptr->hostlist, &uint16_tmp, buffer); + safe_unpackstr_xmalloc(&msg_ptr->reason, &uint16_tmp, buffer); + return SLURM_SUCCESS; + +unpack_error: + xfree(msg_ptr->hostlist); + xfree(msg_ptr->reason); + xfree(msg_ptr); + *msg = NULL; + return SLURM_ERROR; +} + +void inline +gold_agent_pack_node_up_msg(gold_node_up_msg_t *msg, Buf buffer) +{ + pack_time(msg->event_time, buffer); + packstr(msg->hostlist, buffer); +} + +int inline +gold_agent_unpack_node_up_msg(gold_node_up_msg_t **msg, Buf buffer) +{ + gold_node_up_msg_t *msg_ptr; + uint16_t uint16_tmp; + + msg_ptr = xmalloc(sizeof(gold_node_up_msg_t)); + *msg = msg_ptr; + safe_unpack_time(&msg_ptr->event_time, buffer); + safe_unpackstr_xmalloc(&msg_ptr->hostlist, &uint16_tmp, buffer); + return SLURM_SUCCESS; + +unpack_error: + xfree(msg_ptr->hostlist); + xfree(msg_ptr); + *msg = NULL; + return SLURM_ERROR; +} diff --git a/src/plugins/jobacct/gold/agent.h b/src/plugins/jobacct/gold/agent.h new file mode 100644 index 000000000..13df74826 --- /dev/null +++ b/src/plugins/jobacct/gold/agent.h @@ -0,0 +1,170 @@ +/****************************************************************************\ + * agent.h - Definitions used to queue and process pending Gold requests + * Largely copied from src/common/slurmdbd_defs.h in Slurm v1.3 + ***************************************************************************** + * Copyright (C) 2008 Lawrence Livermore National Security. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Morris Jette <jette1@llnl.gov> + * UCRL-CODE-226842. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ + +#ifndef _GOLD_AGENT_H +#define _GOLD_AGENT_H + +#if HAVE_CONFIG_H +# include "config.h" +# if HAVE_INTTYPES_H +# include <inttypes.h> +# else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +# endif /* HAVE_INTTYPES_H */ +#else /* !HAVE_CONFIG_H */ +# include <inttypes.h> +#endif /* HAVE_CONFIG_H */ + +#include "src/common/pack.h" + +/* Increment SLURM_DBD_VERSION if any of the RPCs change */ +#define SLURM_DBD_VERSION 01 + +/* SLURM DBD message types */ +typedef enum { + GOLD_MSG_INIT = 1400, /* Connection initialization */ + GOLD_MSG_CLUSTER_PROCS, /* Record tota processors on cluster */ + GOLD_MSG_JOB_COMPLETE, /* Record job completion */ + GOLD_MSG_JOB_START, /* Record job starting */ + GOLD_MSG_NODE_DOWN, /* Record node state going DOWN */ + GOLD_MSG_NODE_UP, /* Record node state coming UP */ + GOLD_MSG_STEP_START /* Record step starting */ +} slurm_gold_msg_type_t; + +/*****************************************************************************\ + * Slurm DBD protocol data structures +\*****************************************************************************/ + +typedef struct gold_agent_msg { + uint16_t msg_type; /* see gold_agent_msg_type_t above */ + void * data; /* pointer to a message type below */ +} gold_agent_msg_t; + +typedef struct gold_cluster_procs_msg { + uint32_t proc_count; /* total processor count */ + time_t event_time; /* time of transition */ +} gold_cluster_procs_msg_t; + +typedef struct gold_job_info_msg { + char * account; /* bank account for job */ + time_t begin_time; /* time job becomes eligible to run */ + time_t end_time; /* job termintation time */ + uint32_t exit_code; /* job exit code or signal */ + uint32_t job_id; /* job ID */ + uint16_t job_state; /* job state */ + char * name; /* job name */ + char * nodes; /* hosts allocated to the job */ + char * partition; /* job's partition */ + time_t start_time; /* job start time */ + time_t submit_time; /* job submit time */ + uint32_t total_procs; /* count of allocated processors */ + uint32_t user_id; /* owner's UID */ +} gold_job_info_msg_t; + +typedef struct gold_node_down_msg { + uint16_t cpus; /* processors on the node */ + time_t event_time; /* time of transition */ + char *hostlist; /* name of hosts */ + char *reason; /* explanation for the node's state */ +} gold_node_down_msg_t; + + +typedef struct gold_node_up_msg { + time_t event_time; /* time of transition */ + char *hostlist; /* name of hosts */ +} gold_node_up_msg_t; + +/*****************************************************************************\ + * Slurm DBD message processing functions +\*****************************************************************************/ + +/* Initiated a Gold message agent. Recover any saved RPCs. */ +extern int gold_agent_init(void); + +/* Terminate a Gold message agent. Save any pending RPCs. */ +extern int gold_agent_fini(void); + +/* Send an RPC to the Gold. Do not wait for the reply. The RPC + * will be queued and processed later if Gold is not responding. + * Returns SLURM_SUCCESS or an error code */ +extern int gold_agent_xmit(gold_agent_msg_t *req); + +/*****************************************************************************\ + * Functions for processing the Gold requests, located in jobacct_gold.c +\*****************************************************************************/ +/* For all functions below + * RET SLURM_SUCCESS on success + * SLURM_ERROR on non-recoverable error (e.g. invalid account ID) + * EAGAIN on recoverable error (e.g. Gold not responding) */ +extern int agent_cluster_procs(Buf buffer); +extern int agent_job_start(Buf buffer); +extern int agent_job_complete(Buf buffer); +extern int agent_step_start(Buf buffer); +extern int agent_node_down(Buf buffer); +extern int agent_node_up(Buf buffer); + +/*****************************************************************************\ + * Free various Gold message structures +\*****************************************************************************/ +void inline gold_agent_free_cluster_procs_msg(gold_cluster_procs_msg_t *msg); +void inline gold_agent_free_job_info_msg(gold_job_info_msg_t *msg); +void inline gold_agent_free_node_down_msg(gold_node_down_msg_t *msg); +void inline gold_agent_free_node_up_msg(gold_node_up_msg_t *msg); + +/*****************************************************************************\ + * Pack various Gold message structures into a buffer +\*****************************************************************************/ +void inline gold_agent_pack_cluster_procs_msg(gold_cluster_procs_msg_t *msg, + Buf buffer); +void inline gold_agent_pack_job_info_msg(gold_job_info_msg_t *msg, Buf buffer); +void inline gold_agent_pack_node_down_msg(gold_node_down_msg_t *msg, Buf buffer); +void inline gold_agent_pack_node_up_msg(gold_node_up_msg_t *msg, Buf buffer); + +/*****************************************************************************\ + * Unpack various Gold message structures from a buffer +\*****************************************************************************/ +int inline gold_agent_unpack_cluster_procs_msg(gold_cluster_procs_msg_t **msg, + Buf buffer); +int inline gold_agent_unpack_job_info_msg(gold_job_info_msg_t **msg, Buf buffer); +int inline gold_agent_unpack_node_down_msg(gold_node_down_msg_t **msg, + Buf buffer); +int inline gold_agent_unpack_node_up_msg(gold_node_up_msg_t **msg, Buf buffer); + +#endif /* !_GOLD_AGENT_H */ diff --git a/src/plugins/jobacct/gold/jobacct_gold.c b/src/plugins/jobacct/gold/jobacct_gold.c index 1a0656435..2b58e0373 100644 --- a/src/plugins/jobacct/gold/jobacct_gold.c +++ b/src/plugins/jobacct/gold/jobacct_gold.c @@ -1,9 +1,8 @@ /*****************************************************************************\ * jobacct_gold.c - jobacct interface to gold. - * - * $Id: jobacct_gold.c 13077 2008-01-23 22:31:44Z da $ ***************************************************************************** * Copyright (C) 2004-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Danny Auble <da@llnl.gov> * @@ -36,22 +35,21 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \*****************************************************************************/ #include "gold_interface.h" +#include "agent.h" -#include <stdlib.h> #include <ctype.h> +#include <stdlib.h> #include <sys/stat.h> -#include "src/common/xmalloc.h" #include "src/common/list.h" -#include "src/common/xstring.h" +#include "src/common/parse_time.h" +#include "src/common/slurm_jobacct.h" +#include "src/common/slurm_protocol_api.h" #include "src/common/uid.h" -#include <src/common/parse_time.h> - +#include "src/common/xmalloc.h" +#include "src/common/xstring.h" #include "src/slurmctld/slurmctld.h" #include "src/slurmd/slurmd/slurmd.h" -#include "src/common/slurm_jobacct.h" -#include "src/common/slurm_protocol_api.h" - typedef struct { char *user; @@ -60,6 +58,8 @@ typedef struct { char *gold_id; } gold_account_t; +static int _add_edit_job(gold_job_info_msg_t *job_ptr, gold_object_t action); + /* * These variables are required by the generic plugin interface. If they * are not found in the plugin, the plugin loader will ignore it. @@ -135,7 +135,7 @@ static int _check_for_job(uint32_t jobid, time_t submit) gold_request_add_condition(gold_request, "JobId", tmp_buff, GOLD_OPERATOR_NONE); - snprintf(tmp_buff, sizeof(tmp_buff), "%u", (int)submit); + snprintf(tmp_buff, sizeof(tmp_buff), "%u", (uint32_t)submit); gold_request_add_condition(gold_request, "SubmitTime", tmp_buff, GOLD_OPERATOR_NONE); @@ -154,6 +154,12 @@ static int _check_for_job(uint32_t jobid, time_t submit) return rc; } +/* + * Get an account ID for some user/project/machine + * RET the account ID OR + * NULL on Gold communcation failure OR + * "0" if there is no valid account + */ static char *_get_account_id(char *user, char *project, char *machine) { gold_request_t *gold_request = NULL; @@ -224,143 +230,6 @@ static char *_get_account_id(char *user, char *project, char *machine) return gold_account_id; } -static int _add_edit_job(struct job_record *job_ptr, gold_object_t action) -{ - gold_request_t *gold_request = create_gold_request(GOLD_OBJECT_JOB, - action); - gold_response_t *gold_response = NULL; - char tmp_buff[50]; - int rc = SLURM_ERROR; - char *gold_account_id = NULL; - char *user = uid_to_string((uid_t)job_ptr->user_id); - char *jname = NULL; - int tmp = 0, i = 0; - char *account = NULL; - char *nodes = "(null)"; - - if(!gold_request) - return rc; - - if ((tmp = strlen(job_ptr->name))) { - jname = xmalloc(++tmp); - for (i=0; i<tmp; i++) { - if (isspace(job_ptr->name[i])) - jname[i]='_'; - else - jname[i]=job_ptr->name[i]; - } - } else - jname = xstrdup("allocation"); - - if (job_ptr->account && job_ptr->account[0]) - account = job_ptr->account; - - if (job_ptr->nodes && job_ptr->nodes[0]) - nodes = job_ptr->nodes; - - -//info("total procs is %d", job_ptr->details->total_procs); - if(action == GOLD_ACTION_CREATE) { - snprintf(tmp_buff, sizeof(tmp_buff), "%u", job_ptr->job_id); - gold_request_add_assignment(gold_request, "JobId", tmp_buff); - - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - (int)job_ptr->details->submit_time); - gold_request_add_assignment(gold_request, "SubmitTime", - tmp_buff); - - gold_account_id = _get_account_id(user, account, - cluster_name); - - gold_request_add_assignment(gold_request, "GoldAccountId", - gold_account_id); - xfree(gold_account_id); - - } else if (action == GOLD_ACTION_MODIFY) { - snprintf(tmp_buff, sizeof(tmp_buff), "%u", job_ptr->job_id); - gold_request_add_condition(gold_request, "JobId", tmp_buff, - GOLD_OPERATOR_NONE); - - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - (int)job_ptr->details->submit_time); - gold_request_add_condition(gold_request, "SubmitTime", - tmp_buff, - GOLD_OPERATOR_NONE); - } else { - destroy_gold_request(gold_request); - error("_add_edit_job: bad action given %d", action); - return rc; - } - - gold_request_add_assignment(gold_request, "Partition", - job_ptr->partition); - - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - job_ptr->details->total_procs); - gold_request_add_assignment(gold_request, "RequestedCPUCount", - tmp_buff); - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - job_ptr->details->total_procs); - gold_request_add_assignment(gold_request, "AllocatedCPUCount", - tmp_buff); - - gold_request_add_assignment(gold_request, "NodeList", - nodes); - - gold_request_add_assignment(gold_request, "JobName", - jname); - xfree(jname); - - if(job_ptr->job_state != JOB_RUNNING) { - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - (int)job_ptr->end_time); - gold_request_add_assignment(gold_request, "EndTime", - tmp_buff); - - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - (int)job_ptr->exit_code); - gold_request_add_assignment(gold_request, "ExitCode", - tmp_buff); - } -/* gold_request_add_assignment(gold_request, "ReservedCPUSeconds", */ -/* ); */ - - - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - (int)job_ptr->details->begin_time); - gold_request_add_assignment(gold_request, "EligibleTime", - tmp_buff); - - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - (int)job_ptr->start_time); - gold_request_add_assignment(gold_request, "StartTime", - tmp_buff); - - snprintf(tmp_buff, sizeof(tmp_buff), "%u", - job_ptr->job_state & (~JOB_COMPLETING)); - gold_request_add_assignment(gold_request, "State", - tmp_buff); - - gold_response = get_gold_response(gold_request); - destroy_gold_request(gold_request); - - if(!gold_response) { - error("_add_edit_job: no response received"); - return rc; - } - - if(!gold_response->rc) - rc = SLURM_SUCCESS; - else { - error("gold_response has non-zero rc(%d): %s", - gold_response->rc, - gold_response->message); - } - destroy_gold_response(gold_response); - - return rc; -} - /* * init() is called when the plugin is loaded, before any other functions * are called. Put global initialization here. @@ -504,7 +373,7 @@ int jobacct_p_init_slurmctld(char *gold_info) cluster_name, keyfile, host, port); init_gold(cluster_name, keyfile, host, port); - + gold_agent_init(); xfree(keyfile); xfree(host); @@ -513,6 +382,7 @@ int jobacct_p_init_slurmctld(char *gold_info) int jobacct_p_fini_slurmctld() { + gold_agent_fini(); xfree(cluster_name); if(gold_account_list) list_destroy(gold_account_list); @@ -522,44 +392,84 @@ int jobacct_p_fini_slurmctld() int jobacct_p_job_start_slurmctld(struct job_record *job_ptr) { - gold_object_t action = GOLD_ACTION_CREATE; - - if(_check_for_job(job_ptr->job_id, job_ptr->details->submit_time)) { - error("It looks like this job is already in GOLD. " - "This shouldn't happen, we are going to overwrite " - "old info."); - action = GOLD_ACTION_MODIFY; - } + gold_agent_msg_t msg; + gold_job_info_msg_t req; + + req.account = job_ptr->account; + req.begin_time = job_ptr->details->begin_time; + req.end_time = job_ptr->end_time; + req.exit_code = job_ptr->exit_code; + req.job_id = job_ptr->job_id; + req.job_state = job_ptr->job_state; + req.name = job_ptr->name; + req.nodes = job_ptr->nodes; + req.partition = job_ptr->partition; + req.start_time = job_ptr->start_time; + req.submit_time = job_ptr->details->submit_time; + req.total_procs = job_ptr->details->total_procs; + req.user_id = job_ptr->user_id; + msg.msg_type = GOLD_MSG_JOB_START; + msg.data = &req; + + if (gold_agent_xmit(&msg) < 0) + return SLURM_ERROR; - return _add_edit_job(job_ptr, action); + return SLURM_SUCCESS; } int jobacct_p_job_complete_slurmctld(struct job_record *job_ptr) { - gold_object_t action = GOLD_ACTION_MODIFY; - - if(!_check_for_job(job_ptr->job_id, job_ptr->details->submit_time)) { - error("Couldn't find this job entry. " - "This shouldn't happen, we are going to create one."); - action = GOLD_ACTION_CREATE; - } + gold_agent_msg_t msg; + gold_job_info_msg_t req; + + req.account = job_ptr->account; + req.begin_time = job_ptr->details->begin_time; + req.end_time = job_ptr->end_time; + req.exit_code = job_ptr->exit_code; + req.job_id = job_ptr->job_id; + req.job_state = job_ptr->job_state; + req.name = job_ptr->name; + req.nodes = job_ptr->nodes; + req.partition = job_ptr->partition; + req.start_time = job_ptr->start_time; + req.submit_time = job_ptr->details->submit_time; + req.total_procs = job_ptr->details->total_procs; + req.user_id = job_ptr->user_id; + msg.msg_type = GOLD_MSG_JOB_COMPLETE; + msg.data = &req; + + if (gold_agent_xmit(&msg) < 0) + return SLURM_ERROR; - return _add_edit_job(job_ptr, action); + return SLURM_SUCCESS; } int jobacct_p_step_start_slurmctld(struct step_record *step) { - gold_object_t action = GOLD_ACTION_MODIFY; - - if(!_check_for_job(step->job_ptr->job_id, - step->job_ptr->details->submit_time)) { - error("Couldn't find this job entry. " - "This shouldn't happen, we are going to create one."); - action = GOLD_ACTION_CREATE; - } - - return _add_edit_job(step->job_ptr, action); + gold_agent_msg_t msg; + gold_job_info_msg_t req; + struct job_record *job_ptr = step->job_ptr; + + req.account = job_ptr->account; + req.begin_time = job_ptr->details->begin_time; + req.end_time = job_ptr->end_time; + req.exit_code = job_ptr->exit_code; + req.job_id = job_ptr->job_id; + req.job_state = job_ptr->job_state; + req.name = job_ptr->name; + req.nodes = job_ptr->nodes; + req.partition = job_ptr->partition; + req.start_time = job_ptr->start_time; + req.submit_time = job_ptr->details->submit_time; + req.total_procs = job_ptr->details->total_procs; + req.user_id = job_ptr->user_id; + msg.msg_type = GOLD_MSG_STEP_START; + msg.data = &req; + + if (gold_agent_xmit(&msg) < 0) + return SLURM_ERROR; + return SLURM_SUCCESS; } int jobacct_p_step_complete_slurmctld(struct step_record *step) @@ -620,174 +530,488 @@ void jobacct_p_resume_poll() extern int jobacct_p_node_down(struct node_record *node_ptr, time_t event_time, char *reason) { + gold_agent_msg_t msg; + gold_node_down_msg_t req; uint16_t cpus; - int rc = SLURM_ERROR; - gold_request_t *gold_request = NULL; - gold_response_t *gold_response = NULL; - char tmp_buff[50]; if (slurmctld_conf.fast_schedule) cpus = node_ptr->config_ptr->cpus; else cpus = node_ptr->cpus; + if (reason == NULL) + reason = node_ptr->reason; +#if _DEBUG +{ + char tmp_buff[50]; + slurm_make_time_str(&event_time, tmp_buff, sizeof(tmp_buff)); + info("jobacct_p_node_down: %s at %s with %u cpus due to %s", + node_ptr->name, tmp_buff, cpus, reason); +} +#endif + req.cpus = cpus; + req.event_time = event_time; + req.hostlist = node_ptr->name; + req.reason = reason; + msg.msg_type = GOLD_MSG_NODE_DOWN; + msg.data = &req; + + if (gold_agent_xmit(&msg) < 0) + return SLURM_ERROR; + + return SLURM_SUCCESS; +} + +extern int jobacct_p_node_up(struct node_record *node_ptr, time_t event_time) +{ + gold_agent_msg_t msg; + gold_node_up_msg_t req; #if _DEBUG +{ + char tmp_buff[50]; slurm_make_time_str(&event_time, tmp_buff, sizeof(tmp_buff)); - info("Node_acct_down: %s at %s with %u cpus due to %s", - node_ptr->name, tmp_buff, cpus, node_ptr->reason); + info("jobacct_p_node_up: %s at %s", node_ptr->name, tmp_buff); +} #endif - /* If the node was already down end that record since the - * reason will most likely be different - */ - gold_request = create_gold_request(GOLD_OBJECT_EVENT, - GOLD_ACTION_MODIFY); - if(!gold_request) - return rc; + req.hostlist = node_ptr->name; + req.event_time = event_time; + msg.msg_type = GOLD_MSG_NODE_UP; + msg.data = &req; + + if (gold_agent_xmit(&msg) < 0) + return SLURM_ERROR; + + return SLURM_SUCCESS; +} + +extern int jobacct_p_cluster_procs(uint32_t procs, time_t event_time) +{ + static uint32_t last_procs = 0; + gold_agent_msg_t msg; + gold_cluster_procs_msg_t req; + +#if _DEBUG +{ + char tmp_buff[50]; + slurm_make_time_str(&event_time, tmp_buff, sizeof(tmp_buff)); + info("jobacct_p_cluster_procs: %s has %u total CPUs at %s", + cluster_name, procs, tmp_buff); +} +#endif + if (procs == last_procs) { + debug3("jobacct_p_cluster_procs: no change in proc count"); + return SLURM_SUCCESS; + } + last_procs = procs; + + req.proc_count = procs; + req.event_time = event_time; + msg.msg_type = GOLD_MSG_CLUSTER_PROCS; + msg.data = &req; + + if (gold_agent_xmit(&msg) < 0) + return SLURM_ERROR; + + return SLURM_SUCCESS; +} + +/* + * Functions that process queued Gold requests + */ +extern int agent_job_start(Buf buffer) +{ + int rc; + gold_job_info_msg_t *job_info_msg; + gold_object_t action; + + if (gold_agent_unpack_job_info_msg(&job_info_msg, buffer) != + SLURM_SUCCESS) { + error("Failed to unpack GOLD_MSG_JOB_START message"); + return SLURM_ERROR; + } + + if (_check_for_job(job_info_msg->job_id, + job_info_msg->submit_time)) { + error("Job %u is already in GOLD, overwrite old info", + job_info_msg->job_id); + action = GOLD_ACTION_MODIFY; + } else { + action = GOLD_ACTION_CREATE; + } + + rc = _add_edit_job(job_info_msg, action); + gold_agent_free_job_info_msg(job_info_msg); + return rc; +} + +extern int agent_job_complete(Buf buffer) +{ + int rc; + gold_job_info_msg_t *job_info_msg; + gold_object_t action; + + if (gold_agent_unpack_job_info_msg(&job_info_msg, buffer) != + SLURM_SUCCESS) { + error("Failed to unpack GOLD_MSG_JOB_COMPLETE message"); + return SLURM_ERROR; + } + + if (_check_for_job(job_info_msg->job_id, + job_info_msg->submit_time)) { + action = GOLD_ACTION_MODIFY; + } else { + error("Job %u is missing from GOLD, creating new record", + job_info_msg->job_id); + action = GOLD_ACTION_CREATE; + } + + rc = _add_edit_job(job_info_msg, action); + gold_agent_free_job_info_msg(job_info_msg); + return rc; +} + +extern int agent_step_start(Buf buffer) +{ + int rc; + gold_job_info_msg_t *job_info_msg; + gold_object_t action; + + if (gold_agent_unpack_job_info_msg(&job_info_msg, buffer) != + SLURM_SUCCESS) { + error("Failed to unpack GOLD_MSG_STEP_START message"); + return SLURM_ERROR; + } + + if (_check_for_job(job_info_msg->job_id, + job_info_msg->submit_time)) { + action = GOLD_ACTION_MODIFY; + } else { + error("Job %u is missing from GOLD, creating new record", + job_info_msg->job_id); + action = GOLD_ACTION_CREATE; + } + + rc = _add_edit_job(job_info_msg, action); + gold_agent_free_job_info_msg(job_info_msg); + return rc; +} + +/* + * Update a job entry + * RET SLURM_SUCCESS on success + * SLURM_ERROR on non-recoverable error (e.g. invalid account ID) + * EAGAIN on recoverable error (e.g. Gold not responding) + */ +static int _add_edit_job(gold_job_info_msg_t *job_ptr, gold_object_t action) +{ + gold_request_t *gold_request = create_gold_request(GOLD_OBJECT_JOB, + action); + gold_response_t *gold_response = NULL; + char tmp_buff[50]; + int rc = SLURM_ERROR; + char *gold_account_id = NULL; + char *user = uid_to_string((uid_t)job_ptr->user_id); + char *jname = NULL; + int tmp = 0, i = 0; + char *account = NULL; + char *nodes = "(null)"; + + if (!gold_request) + return SLURM_ERROR; + + if (action == GOLD_ACTION_CREATE) { + snprintf(tmp_buff, sizeof(tmp_buff), "%u", job_ptr->job_id); + gold_request_add_assignment(gold_request, "JobId", tmp_buff); + + snprintf(tmp_buff, sizeof(tmp_buff), "%u", + (uint32_t)job_ptr->submit_time); + gold_request_add_assignment(gold_request, "SubmitTime", + tmp_buff); + + gold_account_id = _get_account_id(user, account, + cluster_name); + if ((gold_account_id == NULL) || + ((gold_account_id[0] == '0') && (gold_account_id[1] == '\0'))) { + destroy_gold_request(gold_request); + if (gold_account_id) { + xfree(gold_account_id); + return SLURM_ERROR; /* Invalid account */ + } + return EAGAIN; /* Gold not responding */ + } + gold_request_add_assignment(gold_request, "GoldAccountId", + gold_account_id); + xfree(gold_account_id); + + } else if (action == GOLD_ACTION_MODIFY) { + snprintf(tmp_buff, sizeof(tmp_buff), "%u", job_ptr->job_id); + gold_request_add_condition(gold_request, "JobId", tmp_buff, + GOLD_OPERATOR_NONE); + + snprintf(tmp_buff, sizeof(tmp_buff), "%u", + (uint32_t)job_ptr->submit_time); + gold_request_add_condition(gold_request, "SubmitTime", + tmp_buff, + GOLD_OPERATOR_NONE); + } else { + destroy_gold_request(gold_request); + error("_add_edit_job: bad action given %d", + action); + return SLURM_ERROR; + } + + if ((tmp = strlen(job_ptr->name))) { + jname = xmalloc(++tmp); + for (i=0; i<tmp; i++) { + if (isspace(job_ptr->name[i])) + jname[i]='_'; + else + jname[i]=job_ptr->name[i]; + } + } else + jname = xstrdup("allocation"); + gold_request_add_assignment(gold_request, "JobName", jname); + xfree(jname); + + if (job_ptr->account && job_ptr->account[0]) + account = job_ptr->account; - gold_request_add_condition(gold_request, "Machine", cluster_name, - GOLD_OPERATOR_NONE); - gold_request_add_condition(gold_request, "EndTime", "0", - GOLD_OPERATOR_NONE); - gold_request_add_condition(gold_request, "Name", node_ptr->name, - GOLD_OPERATOR_NONE); + if (job_ptr->nodes && job_ptr->nodes[0]) + nodes = job_ptr->nodes; + + gold_request_add_assignment(gold_request, "Partition", + job_ptr->partition); + snprintf(tmp_buff, sizeof(tmp_buff), "%u", job_ptr->total_procs); + gold_request_add_assignment(gold_request, "RequestedCPUCount", + tmp_buff); + snprintf(tmp_buff, sizeof(tmp_buff), "%u", job_ptr->total_procs); + gold_request_add_assignment(gold_request, "AllocatedCPUCount", + tmp_buff); + gold_request_add_assignment(gold_request, "NodeList", nodes); + + + if (job_ptr->job_state != JOB_RUNNING) { + snprintf(tmp_buff, sizeof(tmp_buff), "%u", + (uint32_t)job_ptr->end_time); + gold_request_add_assignment(gold_request, "EndTime", + tmp_buff); + + snprintf(tmp_buff, sizeof(tmp_buff), "%u", + job_ptr->exit_code); + gold_request_add_assignment(gold_request, "ExitCode", + tmp_buff); + } + + + snprintf(tmp_buff, sizeof(tmp_buff), "%u", + (uint32_t)job_ptr->begin_time); + gold_request_add_assignment(gold_request, "EligibleTime", tmp_buff); + + snprintf(tmp_buff, sizeof(tmp_buff), "%u", + (uint32_t)job_ptr->start_time); + gold_request_add_assignment(gold_request, "StartTime", tmp_buff); + + snprintf(tmp_buff, sizeof(tmp_buff), "%u", + job_ptr->job_state & (~JOB_COMPLETING)); + gold_request_add_assignment(gold_request, "State", tmp_buff); - snprintf(tmp_buff, sizeof(tmp_buff), "%d", ((int)event_time - 1)); - gold_request_add_assignment(gold_request, "EndTime", tmp_buff); - gold_response = get_gold_response(gold_request); destroy_gold_request(gold_request); - if(!gold_response) { - error("jobacct_p_cluster_procs: no response received"); - return rc; + if (!gold_response) { + error("_add_edit_job: no response received"); + return EAGAIN; } - if(gold_response->rc) { + if (!gold_response->rc) + rc = SLURM_SUCCESS; + else { error("gold_response has non-zero rc(%d): %s", gold_response->rc, gold_response->message); - destroy_gold_response(gold_response); - return rc; + rc = SLURM_ERROR; } destroy_gold_response(gold_response); - /* now add the new one */ + return rc; +} + +extern int agent_node_up(Buf buffer) +{ + int rc = SLURM_ERROR; + gold_request_t *gold_request = NULL; + gold_response_t *gold_response = NULL; + char tmp_buff[50]; + gold_node_up_msg_t *node_up_msg; + time_t event_time; + + if (gold_agent_unpack_node_up_msg(&node_up_msg, buffer) != + SLURM_SUCCESS) { + error("Failed to unpack GOLD_MSG_NODE_UP message"); + return SLURM_ERROR; + } + gold_request = create_gold_request(GOLD_OBJECT_EVENT, - GOLD_ACTION_CREATE); - if(!gold_request) - return rc; + GOLD_ACTION_MODIFY); + if (!gold_request) + goto fini; - gold_request_add_assignment(gold_request, "Machine", cluster_name); - snprintf(tmp_buff, sizeof(tmp_buff), "%d", (int)event_time); - gold_request_add_assignment(gold_request, "StartTime", tmp_buff); - gold_request_add_assignment(gold_request, "Name", node_ptr->name); - snprintf(tmp_buff, sizeof(tmp_buff), "%u", node_ptr->cpus); - gold_request_add_assignment(gold_request, "CPUCount", tmp_buff); - if(reason) - gold_request_add_assignment(gold_request, "Reason", reason); - else - gold_request_add_assignment(gold_request, "Reason", - node_ptr->reason); + gold_request_add_condition(gold_request, "Machine", + cluster_name, GOLD_OPERATOR_NONE); + gold_request_add_condition(gold_request, "EndTime", "0", + GOLD_OPERATOR_NONE); + gold_request_add_condition(gold_request, "Name", + node_up_msg->hostlist, + GOLD_OPERATOR_NONE); + event_time = node_up_msg->event_time; + if (event_time) + event_time--; + snprintf(tmp_buff, sizeof(tmp_buff), "%u", (uint32_t)event_time); + gold_request_add_assignment(gold_request, "EndTime", + tmp_buff); gold_response = get_gold_response(gold_request); destroy_gold_request(gold_request); - if(!gold_response) { - error("jobacct_p_cluster_procs: no response received"); - return rc; + if (!gold_response) { + error("agent_node_up: no response received"); + rc = EAGAIN; + goto fini; } - if(!gold_response->rc) - rc = SLURM_SUCCESS; - else { + if (gold_response->rc) { error("gold_response has non-zero rc(%d): %s", gold_response->rc, gold_response->message); + destroy_gold_response(gold_response); + goto fini; } destroy_gold_response(gold_response); + rc = SLURM_SUCCESS; + fini: gold_agent_free_node_up_msg(node_up_msg); return rc; } -extern int jobacct_p_node_up(struct node_record *node_ptr, time_t event_time) +extern int agent_node_down(Buf buffer) { int rc = SLURM_ERROR; gold_request_t *gold_request = NULL; gold_response_t *gold_response = NULL; char tmp_buff[50]; + gold_node_down_msg_t *node_down_msg; + time_t event_time; -#if _DEBUG - slurm_make_time_str(&event_time, tmp_buff, sizeof(tmp_buff)); - info("Node_acct_up: %s at %s", node_ptr->name, tmp_buff); -#endif - /* FIXME: WRITE TO DATABASE HERE */ + if (gold_agent_unpack_node_down_msg(&node_down_msg, buffer) != + SLURM_SUCCESS) { + error("Failed to unpack GOLD_MSG_NODE_DOWN message"); + return SLURM_ERROR; + } + /* + * If the node was already down end that record since the + * reason will most likely be different + */ gold_request = create_gold_request(GOLD_OBJECT_EVENT, GOLD_ACTION_MODIFY); - if(!gold_request) - return rc; + if (!gold_request) + goto fini; - gold_request_add_condition(gold_request, "Machine", cluster_name, - GOLD_OPERATOR_NONE); + gold_request_add_condition(gold_request, "Machine", + cluster_name, GOLD_OPERATOR_NONE); gold_request_add_condition(gold_request, "EndTime", "0", GOLD_OPERATOR_NONE); - gold_request_add_condition(gold_request, "Name", node_ptr->name, + gold_request_add_condition(gold_request, "Name", + node_down_msg->hostlist, GOLD_OPERATOR_NONE); - - snprintf(tmp_buff, sizeof(tmp_buff), "%d", ((int)event_time - 1)); - gold_request_add_assignment(gold_request, "EndTime", tmp_buff); + event_time = node_down_msg->event_time; + if (event_time) + event_time--; + snprintf(tmp_buff, sizeof(tmp_buff), "%u", (uint32_t)event_time); + gold_request_add_assignment(gold_request, "EndTime", + tmp_buff); gold_response = get_gold_response(gold_request); destroy_gold_request(gold_request); - if(!gold_response) { - error("jobacct_p_node_up: no response received"); - return rc; + if (!gold_response) { + error("jobacct_p_node_down: no response received"); + rc = EAGAIN; + goto fini; } - if(gold_response->rc) { + if (gold_response->rc) { error("gold_response has non-zero rc(%d): %s", gold_response->rc, gold_response->message); destroy_gold_response(gold_response); - return rc; + goto fini; } destroy_gold_response(gold_response); + /* now add the new one */ + gold_request = create_gold_request(GOLD_OBJECT_EVENT, + GOLD_ACTION_CREATE); + if (!gold_request) + goto fini; + + gold_request_add_assignment(gold_request, "Machine", cluster_name); + snprintf(tmp_buff, sizeof(tmp_buff), "%u", + (uint32_t)node_down_msg->event_time); + gold_request_add_assignment(gold_request, "StartTime", tmp_buff); + gold_request_add_assignment(gold_request, "Name", + node_down_msg->hostlist); + snprintf(tmp_buff, sizeof(tmp_buff), "%u", node_down_msg->cpus); + gold_request_add_assignment(gold_request, "CPUCount", tmp_buff); + gold_request_add_assignment(gold_request, "Reason", + node_down_msg->reason); + + gold_response = get_gold_response(gold_request); + destroy_gold_request(gold_request); + + if (!gold_response) { + error("jobacct_p_node_down: no response received"); + rc = EAGAIN; + goto fini; + } + + if (!gold_response->rc) + rc = SLURM_SUCCESS; + else { + error("gold_response has non-zero rc(%d): %s", + gold_response->rc, + gold_response->message); + } + destroy_gold_response(gold_response); + fini: gold_agent_free_node_down_msg(node_down_msg); return rc; } -extern int jobacct_p_cluster_procs(uint32_t procs, time_t event_time) +extern int agent_cluster_procs(Buf buffer) { - static uint32_t last_procs = -1; + gold_cluster_procs_msg_t *cluster_procs_msg; gold_request_t *gold_request = NULL; gold_response_t *gold_response = NULL; char tmp_buff[50]; int rc = SLURM_ERROR; bool no_modify = 0; - if (procs == last_procs) { - debug3("we have the same procs as before no need to " - "query the database."); - return SLURM_SUCCESS; - } - last_procs = procs; + time_t event_time; - /* Record the processor count */ -#if _DEBUG - slurm_make_time_str(&event_time, tmp_buff, sizeof(tmp_buff)); - info("Node_acct_procs: %s has %u total CPUs at %s", - cluster_name, procs, tmp_buff); -#endif + if (gold_agent_unpack_cluster_procs_msg(&cluster_procs_msg, buffer) != + SLURM_SUCCESS) { + error("Failed to unpack GOLD_MSG_CLUSTER_PROCS message"); + return SLURM_ERROR; + } - /* get the last known one */ + /* get the last known processor count */ gold_request = create_gold_request(GOLD_OBJECT_EVENT, GOLD_ACTION_QUERY); - if(!gold_request) - return rc; - gold_request_add_condition(gold_request, "Machine", cluster_name, - GOLD_OPERATOR_NONE); + if (!gold_request) + goto fini; + gold_request_add_condition(gold_request, "Machine", + cluster_name, GOLD_OPERATOR_NONE); gold_request_add_condition(gold_request, "EndTime", "0", GOLD_OPERATOR_NONE); gold_request_add_condition(gold_request, "Name", "NULL", @@ -798,25 +1022,27 @@ extern int jobacct_p_cluster_procs(uint32_t procs, time_t event_time) gold_response = get_gold_response(gold_request); destroy_gold_request(gold_request); - if(!gold_response) { + if (!gold_response) { error("jobacct_p_cluster_procs: no response received"); - return rc; + rc = EAGAIN; + goto fini; } - if(gold_response->entry_cnt > 0) { + if (gold_response->entry_cnt > 0) { gold_response_entry_t *resp_entry = list_pop(gold_response->entries); gold_name_value_t *name_val = list_pop(resp_entry->name_val); - if(procs == atoi(name_val->value)) { + if (cluster_procs_msg->proc_count == atoi(name_val->value)) { debug("System hasn't changed since last entry"); destroy_gold_name_value(name_val); destroy_gold_response_entry(resp_entry); destroy_gold_response(gold_response); - return SLURM_SUCCESS; + rc = SLURM_SUCCESS; + goto fini; } else { - debug("System has changed from %s cpus to %d", - name_val->value, procs); + debug("System has changed from %s cpus to %u", + name_val->value, cluster_procs_msg->proc_count); } destroy_gold_name_value(name_val); @@ -829,64 +1055,71 @@ extern int jobacct_p_cluster_procs(uint32_t procs, time_t event_time) destroy_gold_response(gold_response); - - if(no_modify) { + if (no_modify) { gold_request = create_gold_request(GOLD_OBJECT_EVENT, GOLD_ACTION_MODIFY); - if(!gold_request) - return rc; + if (!gold_request) + goto fini; gold_request_add_condition(gold_request, "Machine", - cluster_name, - GOLD_OPERATOR_NONE); + cluster_name, GOLD_OPERATOR_NONE); gold_request_add_condition(gold_request, "EndTime", "0", GOLD_OPERATOR_NONE); gold_request_add_condition(gold_request, "Name", "NULL", GOLD_OPERATOR_NONE); - - snprintf(tmp_buff, sizeof(tmp_buff), "%d", - ((int)event_time - 1)); - gold_request_add_assignment(gold_request, "EndTime", tmp_buff); + + event_time = cluster_procs_msg->event_time; + if (event_time) + event_time--; + snprintf(tmp_buff, sizeof(tmp_buff), "%u", + (uint32_t)event_time); + gold_request_add_assignment(gold_request, "EndTime", + tmp_buff); gold_response = get_gold_response(gold_request); destroy_gold_request(gold_request); - if(!gold_response) { - error("jobacct_p_cluster_procs: no response received"); - return rc; + if (!gold_response) { + error("jobacct_p_cluster_procs: no response " + "received"); + rc = EAGAIN; + goto fini; } - if(gold_response->rc) { + if (gold_response->rc) { error("gold_response has non-zero rc(%d): %s", gold_response->rc, gold_response->message); destroy_gold_response(gold_response); - return rc; + goto fini; } destroy_gold_response(gold_response); } - /* now add the new one */ + /* now add the new processor count */ gold_request = create_gold_request(GOLD_OBJECT_EVENT, GOLD_ACTION_CREATE); - if(!gold_request) - return rc; + if (!gold_request) + goto fini; gold_request_add_assignment(gold_request, "Machine", cluster_name); - snprintf(tmp_buff, sizeof(tmp_buff), "%d", (int)event_time); + snprintf(tmp_buff, sizeof(tmp_buff), "%u", + (uint32_t)cluster_procs_msg->event_time); gold_request_add_assignment(gold_request, "StartTime", tmp_buff); - snprintf(tmp_buff, sizeof(tmp_buff), "%u", procs); + snprintf(tmp_buff, sizeof(tmp_buff), "%u", + cluster_procs_msg->proc_count); gold_request_add_assignment(gold_request, "CPUCount", tmp_buff); gold_response = get_gold_response(gold_request); destroy_gold_request(gold_request); - if(!gold_response) { + if (!gold_response) { error("jobacct_p_cluster_procs: no response received"); - return rc; + rc = EAGAIN; + goto fini; } - if(!gold_response->rc) + if (!gold_response->rc) rc = SLURM_SUCCESS; else { error("gold_response has non-zero rc(%d): %s", @@ -895,5 +1128,6 @@ extern int jobacct_p_cluster_procs(uint32_t procs, time_t event_time) } destroy_gold_response(gold_response); + fini: gold_agent_free_cluster_procs_msg(cluster_procs_msg); return rc; } diff --git a/src/plugins/mpi/mvapich/mvapich.c b/src/plugins/mpi/mvapich/mvapich.c index 38f1f5d8e..b2e5b8684 100644 --- a/src/plugins/mpi/mvapich/mvapich.c +++ b/src/plugins/mpi/mvapich/mvapich.c @@ -1366,6 +1366,15 @@ static void mvapich_state_destroy(mvapich_state_t *st) xfree(st); } +/* + * Create a unique MPIRUN_ID for jobid/stepid pairs. + * Combine the least significant bits of the jobid and stepid + */ +int mpirun_id_create(const mpi_plugin_client_info_t *job) +{ + return (int) ((job->jobid << 16) | (job->stepid & 0xffff)); +} + extern mvapich_state_t *mvapich_thr_create(const mpi_plugin_client_info_t *job, char ***env) { @@ -1405,12 +1414,12 @@ extern mvapich_state_t *mvapich_thr_create(const mpi_plugin_client_info_t *job, */ env_array_overwrite_fmt(env, "MPIRUN_PORT", "%hu", port); env_array_overwrite_fmt(env, "MPIRUN_NPROCS", "%d", st->nprocs); - env_array_overwrite_fmt(env, "MPIRUN_ID", "%d", st->job->jobid); + env_array_overwrite_fmt(env, "MPIRUN_ID", "%d", mpirun_id_create(job)); if (st->connect_once) { env_array_overwrite_fmt(env, "MPIRUN_CONNECT_ONCE", "1"); } - verbose ("mvapich-0.9.[45] master listening on port %d", port); + verbose ("mvapich-0.9.[45] master listening on port %hu", port); return st; } diff --git a/src/plugins/select/bluegene/plugin/select_bluegene.c b/src/plugins/select/bluegene/plugin/select_bluegene.c index 085543cd2..e23eca54c 100644 --- a/src/plugins/select/bluegene/plugin/select_bluegene.c +++ b/src/plugins/select/bluegene/plugin/select_bluegene.c @@ -1,7 +1,7 @@ /*****************************************************************************\ * select_bluegene.c - node selection plugin for Blue Gene system. * - * $Id: select_bluegene.c 13271 2008-02-14 20:02:00Z da $ + * $Id: select_bluegene.c 13423 2008-02-29 17:30:38Z da $ ***************************************************************************** * Copyright (C) 2004-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -1164,6 +1164,9 @@ extern int select_p_alter_node_cnt(enum select_node_cnt type, void *data) /* this means it is greater or equal to one bp */ if(tmp > 0) { + select_g_set_jobinfo(job_desc->select_jobinfo, + SELECT_DATA_NODE_CNT, + &job_desc->min_nodes); job_desc->min_nodes = tmp; job_desc->num_procs = procs_per_node * tmp; } else { @@ -1179,6 +1182,10 @@ extern int select_p_alter_node_cnt(enum select_node_cnt type, void *data) job_desc->min_nodes = bluegene_bp_node_cnt; + select_g_set_jobinfo(job_desc->select_jobinfo, + SELECT_DATA_NODE_CNT, + &job_desc->min_nodes); + tmp = bluegene_bp_node_cnt/job_desc->min_nodes; job_desc->num_procs = procs_per_node/tmp; diff --git a/src/sacct/options.c b/src/sacct/options.c index e00036b44..6ebb7c1b1 100644 --- a/src/sacct/options.c +++ b/src/sacct/options.c @@ -205,6 +205,10 @@ void _help_msg(void) "\n" "Options:\n" "\n" + "-A, --Account\n" + " Equivalent to \"--fields=jobid,jobname,start,end,cpu,\n" + " vsize_short,status,exitcode\". This option has no effect\n" + " if --dump is specified.\n" "-a, --all\n" " Display job accounting data for all users. By default, only\n" " data for the current user is displayed for users other than\n" @@ -277,6 +281,8 @@ void _help_msg(void) "-P --purge\n" " Used in conjunction with --expire to remove invalid data\n" " from the job accounting log.\n" + "-r --raw\n" + " don't format data leave in raw format\n" "-s <state-list>, --state=<state-list>\n" " Select jobs based on their current status: running (r),\n" " completed (cd), failed (f), timeout (to), and node_fail (nf).\n" @@ -317,6 +323,7 @@ void _init_params() params.opt_long = 0; /* --long */ params.opt_lowmem = 0; /* --low_memory */ params.opt_purge = 0; /* --purge */ + params.opt_raw = 0; /* --raw */ params.opt_total = 0; /* --total */ params.opt_uid = -1; /* --uid (-1=wildcard, 0=root) */ params.opt_verbose = 0; /* --verbose */ @@ -575,6 +582,7 @@ void parse_command_line(int argc, char **argv) static struct option long_options[] = { {"all", 0,0, 'a'}, + {"Account", 0,0, 'A'}, {"brief", 0, 0, 'b'}, {"duplicates", 0, ¶ms.opt_dup, 1}, {"dump", 0, 0, 'd'}, @@ -594,6 +602,7 @@ void parse_command_line(int argc, char **argv) {"noheader", 0, ¶ms.opt_header, 0}, {"partition", 1, 0, 'p'}, {"purge", 0, 0, 'P'}, + {"raw", 0, 0, 'r'}, {"state", 1, 0, 's'}, {"total", 0, 0, 't'}, {"uid", 1, 0, 'u'}, @@ -611,7 +620,7 @@ void parse_command_line(int argc, char **argv) opterr = 1; /* Let getopt report problems to the user */ while (1) { /* now cycle through the command line */ - c = getopt_long(argc, argv, "abde:F:f:g:hj:J:lOPp:s:StUu:Vv", + c = getopt_long(argc, argv, "aAbde:F:f:g:hj:J:lOPp:rs:StUu:Vv", long_options, &optionIndex); if (c == -1) break; @@ -619,6 +628,16 @@ void parse_command_line(int argc, char **argv) case 'a': params.opt_uid = -1; break; + case 'A': + params.opt_field_list = + xrealloc(params.opt_field_list, + (params.opt_field_list==NULL? 0 : + sizeof(params.opt_field_list)) + + sizeof(ACCOUNT_FIELDS)+1); + strcat(params.opt_field_list, ACCOUNT_FIELDS); + strcat(params.opt_field_list, ","); + break; + case 'b': params.opt_field_list = xrealloc(params.opt_field_list, @@ -762,7 +781,9 @@ void parse_command_line(int argc, char **argv) strcat(params.opt_partition_list, optarg); strcat(params.opt_partition_list, ","); break; - + case 'r': + params.opt_raw = 1; + break; case 's': params.opt_state_list = xrealloc(params.opt_state_list, @@ -774,12 +795,15 @@ void parse_command_line(int argc, char **argv) break; case 'S': - if(!params.opt_field_list) { - params.opt_field_list = - xmalloc(sizeof(STAT_FIELDS)+1); - strcat(params.opt_field_list, STAT_FIELDS); - strcat(params.opt_field_list, ","); - } + params.opt_field_list = + xrealloc(params.opt_field_list, + (params.opt_field_list==NULL? 0 : + strlen(params.opt_field_list)) + + sizeof(STAT_FIELDS)+1); + + strcat(params.opt_field_list, STAT_FIELDS); + strcat(params.opt_field_list, ","); + params.opt_stat = 1; break; @@ -859,6 +883,7 @@ void parse_command_line(int argc, char **argv) "\topt_lowmem=%d\n" "\topt_partition_list=%s\n" "\topt_purge=%d\n" + "\topt_raw=%d\n" "\topt_state_list=%s\n" "\topt_total=%d\n" "\topt_uid=%d\n" @@ -877,6 +902,7 @@ void parse_command_line(int argc, char **argv) params.opt_lowmem, params.opt_partition_list, params.opt_purge, + params.opt_raw, params.opt_state_list, params.opt_total, params.opt_uid, diff --git a/src/sacct/print.c b/src/sacct/print.c index dd7172657..6a55de7a3 100644 --- a/src/sacct/print.c +++ b/src/sacct/print.c @@ -897,12 +897,20 @@ void print_start(type_t type, void *object) printf("%-14s", "--------------"); break; case JOB: + if(params.opt_raw) { + printf("%14d", (int)job->header.timestamp); + break; + } slurm_make_time_str(&job->header.timestamp, time_str, sizeof(time_str)); printf("%-14s", time_str); break; case JOBSTEP: + if(params.opt_raw) { + printf("%14d", (int)step->header.timestamp); + break; + } slurm_make_time_str(&step->header.timestamp, time_str, sizeof(time_str)); @@ -925,12 +933,20 @@ void print_end(type_t type, void *object) printf("%-14s", "--------------"); break; case JOB: + if(params.opt_raw) { + printf("%14d", (int)job->end); + break; + } slurm_make_time_str(&job->end, time_str, sizeof(time_str)); printf("%-14s", time_str); break; case JOBSTEP: + if(params.opt_raw) { + printf("%14d", (int)step->end); + break; + } slurm_make_time_str(&step->end, time_str, sizeof(time_str)); @@ -1101,6 +1117,46 @@ void print_vsize(type_t type, void *object) } } +void print_vsize_short(type_t type, void *object) +{ + job_rec_t *job = (job_rec_t *)object; + step_rec_t *step = (step_rec_t *)object; + char outbuf[FORMAT_STRING_SIZE]; + char buf1[FORMAT_STRING_SIZE]; + sacct_t sacct; + + switch(type) { + case HEADLINE: + printf("%10s", "MaxVSIZE"); + break; + case UNDERSCORE: + printf("%10s", "----------"); + break; + case JOB: + sacct = job->sacct; + if(params.opt_raw) { + printf("%10d", sacct.max_vsize); + break; + } + convert_num_unit((float)sacct.max_vsize, + buf1, sizeof(buf1),UNIT_NONE); + snprintf(outbuf, FORMAT_STRING_SIZE, "%s", buf1); + printf("%10s", outbuf); + break; + case JOBSTEP: + sacct = step->sacct; + if(params.opt_raw) { + printf("%10d", sacct.max_vsize); + break; + } + convert_num_unit((float)sacct.max_vsize, + buf1, sizeof(buf1),UNIT_NONE); + snprintf(outbuf, FORMAT_STRING_SIZE, "%s", buf1); + printf("%10s", outbuf); + break; + } +} + void print_cputime(type_t type, void *object) { job_rec_t *job = (job_rec_t *)object; diff --git a/src/sacct/sacct.c b/src/sacct/sacct.c index 02b2fdb58..680028464 100644 --- a/src/sacct/sacct.c +++ b/src/sacct/sacct.c @@ -195,6 +195,7 @@ fields_t fields[] = {{"account", print_account}, {"user", print_user}, {"usercpu", print_usercpu}, {"vsize", print_vsize}, + {"vsize_short", print_vsize_short}, {NULL, NULL}}; long input_error = 0; /* Muddle through bad data, but complain! */ diff --git a/src/sacct/sacct.h b/src/sacct/sacct.h index 056d42b07..aa36d0b8d 100644 --- a/src/sacct/sacct.h +++ b/src/sacct/sacct.h @@ -66,6 +66,7 @@ * which have no logical jobsteps. */ #define BATCH_JOB_TIMESTAMP 0 +#define ACCOUNT_FIELDS "jobid,jobname,start,end,cpu,vsize_short,status,exitcode" #define BRIEF_FIELDS "jobid,status,exitcode" #define DEFAULT_FIELDS "jobid,jobname,partition,ncpus,status,exitcode" #define STAT_FIELDS "jobid,vsize,rss,pages,cputime,ntasks,status" @@ -262,6 +263,7 @@ typedef struct sacct_parameters { int opt_long; /* --long */ int opt_lowmem; /* --low_memory */ int opt_purge; /* --purge */ + int opt_raw; /* --raw */ int opt_total; /* --total */ int opt_uid; /* --uid (-1=wildcard, 0=root) */ int opt_verbose; /* --verbose */ @@ -334,6 +336,7 @@ void print_uid(type_t type, void *object); void print_user(type_t type, void *object); void print_usercpu(type_t type, void *object); void print_vsize(type_t type, void *object); +void print_vsize_short(type_t type, void *object); void print_cputime(type_t type, void *object); void print_account(type_t type, void *object); diff --git a/src/salloc/opt.c b/src/salloc/opt.c index 6833d2408..fc630c8c8 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -1,7 +1,8 @@ /*****************************************************************************\ * opt.c - options processing for salloc ***************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Mark Grondona <grondona1@llnl.gov>, et. al. * UCRL-CODE-226842. @@ -113,6 +114,8 @@ #define LONG_OPT_LINUX_IMAGE 0x121 #define LONG_OPT_MLOADER_IMAGE 0x122 #define LONG_OPT_RAMDISK_IMAGE 0x123 +#define LONG_OPT_NOSHELL 0x124 +#define LONG_OPT_GET_USER_ENV 0x125 /*---- global variables, defined in opt.h ----*/ opt_t opt; @@ -143,6 +146,7 @@ static void _opt_list(void); /* verify options sanity */ static bool _opt_verify(void); +static void _proc_get_user_env(char *optarg); static void _print_version(void); static void _process_env_var(env_vars_t *e, const char *val); static uint16_t _parse_mail_type(const char *arg); @@ -421,6 +425,7 @@ static void _opt_default() opt.gid = getgid(); + opt.cwd = NULL; opt.progname = NULL; opt.nprocs = 1; @@ -476,6 +481,9 @@ static void _opt_default() opt.egid = (gid_t) -1; opt.bell = BELL_AFTER_DELAY; + opt.no_shell = false; + opt.get_user_env_time = -1; + opt.get_user_env_mode = -1; } /*---[ env var processing ]-----------------------------------------------*/ @@ -660,6 +668,7 @@ void set_options(const int argc, char **argv) {"cpus-per-task", required_argument, 0, 'c'}, {"constraint", required_argument, 0, 'C'}, {"dependency", required_argument, 0, 'd'}, + {"chdir", required_argument, 0, 'D'}, {"nodefile", required_argument, 0, 'F'}, {"geometry", required_argument, 0, 'g'}, {"help", no_argument, 0, 'h'}, @@ -708,9 +717,11 @@ void set_options(const int argc, char **argv) {"linux-image", required_argument, 0, LONG_OPT_LINUX_IMAGE}, {"mloader-image", required_argument, 0, LONG_OPT_MLOADER_IMAGE}, {"ramdisk-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, + {"no-shell", no_argument, 0, LONG_OPT_NOSHELL}, + {"get-user-env", optional_argument, 0, LONG_OPT_GET_USER_ENV}, {NULL, 0, 0, 0} }; - char *opt_string = "+a:c:C:d:F:g:hHIJ:kK::n:N:Op:qR:st:uU:vVw:W:x:"; + char *opt_string = "+a:c:C:d:D:F:g:hHIJ:kK::n:N:Op:qR:st:uU:vVw:W:x:"; opt.progname = xbasename(argv[0]); optind = 0; @@ -735,6 +746,10 @@ void set_options(const int argc, char **argv) case 'd': opt.dependency = _get_int(optarg, "dependency"); break; + case 'D': + xfree(opt.cwd); + opt.cwd = xstrdup(optarg); + break; case 'F': xfree(opt.nodelist); tmp = slurm_read_hostfile(optarg, 0); @@ -976,6 +991,15 @@ void set_options(const int argc, char **argv) xfree(opt.ramdiskimage); opt.ramdiskimage = xstrdup(optarg); break; + case LONG_OPT_NOSHELL: + opt.no_shell = true; + break; + case LONG_OPT_GET_USER_ENV: + if (optarg) + _proc_get_user_env(optarg); + else + opt.get_user_env_time = 0; + break; default: fatal("Unrecognized command line parameter %c", opt_char); @@ -983,6 +1007,24 @@ void set_options(const int argc, char **argv) } } +static void _proc_get_user_env(char *optarg) +{ + char *end_ptr; + + if ((optarg[0] >= '0') && (optarg[0] <= '9')) + opt.get_user_env_time = strtol(optarg, &end_ptr, 10); + else { + opt.get_user_env_time = 0; + end_ptr = optarg; + } + + if ((end_ptr == NULL) || (end_ptr[0] == '\0')) + return; + if ((end_ptr[0] == 's') || (end_ptr[0] == 'S')) + opt.get_user_env_mode = 1; + else if ((end_ptr[0] == 'l') || (end_ptr[0] == 'L')) + opt.get_user_env_mode = 2; +} /* * _opt_args() : set options via commandline args and popt @@ -1028,7 +1070,7 @@ static bool _opt_verify(void) if ((opt.job_name == NULL) && (command_argc > 0)) opt.job_name = _base_name(command_argv[0]); - if (command_argc == 0) { + if ((opt.no_shell == false) && (command_argc == 0)) { error("A local command is a required parameter!"); verified = false; } @@ -1343,7 +1385,7 @@ static void _usage(void) printf( "Usage: salloc [-N numnodes|[min nodes]-[max nodes]] [-n num-processors]\n" " [[-c cpus-per-node] [-r n] [-p partition] [--hold] [-t minutes]\n" -" [--immediate] [--no-kill] [--overcommit]\n" +" [--immediate] [--no-kill] [--overcommit] [-D path]\n" " [--share] [-J jobname] [--jobid=id]\n" " [--verbose] [--gid=group] [--uid=user]\n" " [-W sec] [--minsockets=n] [--mincores=n] [--minthreads=n]\n" @@ -1372,6 +1414,7 @@ static void _help(void) " -p, --partition=partition partition requested\n" " -H, --hold submit job in held state\n" " -t, --time=minutes time limit\n" +" -D, --chdir=path change working directory\n" " -I, --immediate exit if resources are not immediately available\n" " -k, --no-kill do not kill job on node failure\n" " -K, --kill-command[=signal] signal to send terminating job\n" @@ -1394,6 +1437,7 @@ static void _help(void) " --no-bell do NOT ring the terminal bell\n" " --gid=group_id group ID to run job as (user root only)\n" " --uid=user_id user ID to run job as (user root only)\n" +" --get-user-env used by Moab. See srun man page.\n" "\n" "Constraint options:\n" " --mincpus=n minimum number of cpus per node\n" diff --git a/src/salloc/opt.h b/src/salloc/opt.h index 62d27b6ae..b66a318e4 100644 --- a/src/salloc/opt.h +++ b/src/salloc/opt.h @@ -1,8 +1,8 @@ /*****************************************************************************\ * opt.h - definitions for salloc option processing - * $Id: opt.h 12536 2007-10-22 23:57:53Z jette $ ***************************************************************************** - * Copyright (C) 2002-2006 The Regents of the University of California. + * Copyright (C) 2002-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Mark Grondona <grondona1@llnl.gov>, * Christopher J. Morrone <morrone2@llnl.gov>, et. al. @@ -113,6 +113,10 @@ typedef struct salloc_options { uint16_t mail_type; /* --mail-type */ char *mail_user; /* --mail-user */ bell_flag_t bell; /* --bell, --no-bell */ + bool no_shell; /* --no-shell */ + int get_user_env_time; /* --get-user-env[=secs] */ + int get_user_env_mode; /* --get-user-env=[S|L] */ + char *cwd; /* current working directory */ } opt_t; extern opt_t opt; diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 66981c6a1..d78f9d900 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -1,10 +1,9 @@ /*****************************************************************************\ * salloc.c - Request a SLURM job allocation and * launch a user-specified command. - * - * $Id: salloc.c 12700 2007-11-27 23:39:24Z jette $ ***************************************************************************** - * Copyright (C) 2006 The Regents of the University of California. + * Copyright (C) 2006-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Christopher J. Morrone <morrone2@llnl.gov> * UCRL-CODE-226842. @@ -31,13 +30,14 @@ # include "config.h" #endif +#include <pwd.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> -#include <stdbool.h> -#include <unistd.h> #include <sys/types.h> #include <sys/wait.h> #include <time.h> +#include <unistd.h> #include <slurm/slurm.h> @@ -99,6 +99,25 @@ int main(int argc, char *argv[]) log_alter(logopt, 0, NULL); } + if (opt.cwd && chdir(opt.cwd)) { + error("chdir(%s): %m", opt.cwd); + exit(1); + } + + if (opt.get_user_env_time >= 0) { + struct passwd *pw; + pw = getpwuid(opt.uid); + if (pw == NULL) { + error("getpwuid(%u): %m", (uint32_t)opt.uid); + exit(1); + } + env = env_array_user_default(pw->pw_name, + opt.get_user_env_time, + opt.get_user_env_mode); + if (env == NULL) + exit(1); /* error already logged */ + } + /* * Request a job allocation */ @@ -163,6 +182,8 @@ int main(int argc, char *argv[]) && ((after - before) > DEFAULT_BELL_DELAY))) { ring_terminal_bell(); } + if (opt.no_shell) + exit(0); if (allocation_interrupted) { /* salloc process received a signal after * slurm_allocate_resources_blocking returned with the diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index d94a24661..314cc2d2d 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -106,6 +106,7 @@ #define LONG_OPT_NO_REQUEUE 0x116 #define LONG_OPT_COMMENT 0x117 #define LONG_OPT_WRAP 0x118 +#define LONG_OPT_REQUEUE 0x119 #define LONG_OPT_BLRTS_IMAGE 0x140 #define LONG_OPT_LINUX_IMAGE 0x141 #define LONG_OPT_MLOADER_IMAGE 0x142 @@ -654,6 +655,7 @@ static struct option long_options[] = { {"mail-type", required_argument, 0, LONG_OPT_MAIL_TYPE}, {"mail-user", required_argument, 0, LONG_OPT_MAIL_USER}, {"nice", optional_argument, 0, LONG_OPT_NICE}, + {"requeue", no_argument, 0, LONG_OPT_REQUEUE}, {"no-requeue", no_argument, 0, LONG_OPT_NO_REQUEUE}, {"comment", required_argument, 0, LONG_OPT_COMMENT}, {"blrts-image", required_argument, 0, LONG_OPT_BLRTS_IMAGE}, @@ -1275,6 +1277,9 @@ static void _set_options(int argc, char **argv) case LONG_OPT_NO_REQUEUE: opt.no_requeue = true; break; + case LONG_OPT_REQUEUE: + opt.no_requeue = false; /* the default */ + break; case LONG_OPT_COMMENT: xfree(opt.comment); opt.comment = xstrdup(optarg); diff --git a/src/sbcast/agent.c b/src/sbcast/agent.c index a5ed1c0bc..00bfad469 100644 --- a/src/sbcast/agent.c +++ b/src/sbcast/agent.c @@ -1,9 +1,8 @@ /*****************************************************************************\ * agent.c - File transfer agent (handles message traffic) - * - * $Id$ ***************************************************************************** - * Copyright (C) 2006 The Regents of the University of California. + * Copyright (C) 2006-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> * UCRL-CODE-226842. @@ -87,7 +86,7 @@ static void *_agent_thread(void *args) ret_list = slurm_send_recv_msgs(thread_ptr->nodelist, &thread_ptr->msg, - 0); + params.timeout); if (ret_list == NULL) { error("slurm_send_recv_msgs: %m"); exit(1); @@ -123,10 +122,10 @@ extern void send_rpc(file_bcast_msg_t *bcast_msg, { /* Preserve some data structures across calls for better performance */ static int threads_used = 0; + static thd_t thread_info[MAX_THREADS]; int i, fanout, rc = SLURM_SUCCESS; int retries = 0; - thd_t thread_info[MAX_THREADS]; pthread_attr_t attr; if (threads_used == 0) { @@ -181,7 +180,7 @@ extern void send_rpc(file_bcast_msg_t *bcast_msg, if (pthread_attr_setstacksize(&attr, 3 * 1024*1024)) error("pthread_attr_setstacksize: %m"); if (pthread_attr_setdetachstate (&attr, - PTHREAD_CREATE_JOINABLE)) + PTHREAD_CREATE_DETACHED)) error("pthread_attr_setdetachstate error %m"); for (i=0; i<threads_used; i++) { diff --git a/src/sbcast/opts.c b/src/sbcast/opts.c index 30613bcbf..d23c068b0 100644 --- a/src/sbcast/opts.c +++ b/src/sbcast/opts.c @@ -1,7 +1,8 @@ /****************************************************************************\ * opts.c - sbcast command line option processing functions ***************************************************************************** - * Copyright (C) 2006 The Regents of the University of California. + * Copyright (C) 2006-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> * UCRL-CODE-226842. @@ -82,6 +83,7 @@ extern void parse_command_line(int argc, char *argv[]) {"force", no_argument, 0, 'f'}, {"preserve", no_argument, 0, 'p'}, {"size", required_argument, 0, 's'}, + {"timeout", required_argument, 0, 't'}, {"verbose", no_argument, 0, 'v'}, {"version", no_argument, 0, 'V'}, {"help", no_argument, 0, OPT_LONG_HELP}, @@ -99,10 +101,11 @@ extern void parse_command_line(int argc, char *argv[]) params.preserve = true; if ( ( env_val = getenv("SBCAST_SIZE") ) ) params.block_size = _map_size(env_val); - + if ( ( env_val = getenv("SBCAST_TIMEOUT") ) ) + params.timeout = (atoi(env_val) * 1000); optind = 0; - while((opt_char = getopt_long(argc, argv, "CfF:ps:vV", + while((opt_char = getopt_long(argc, argv, "CfF:ps:t:vV", long_options, &option_index)) != -1) { switch (opt_char) { case (int)'?': @@ -125,6 +128,9 @@ extern void parse_command_line(int argc, char *argv[]) case (int) 's': params.block_size = _map_size(optarg); break; + case (int)'t': + params.timeout = (atoi(optarg) * 1000); + break; case (int) 'v': params.verbose++; break; @@ -192,6 +198,7 @@ static void _print_options( void ) info("force = %s", params.force ? "true" : "false"); info("fanout = %d", params.fanout); info("preserve = %s", params.preserve ? "true" : "false"); + info("timeout = %d", params.timeout); info("verbose = %d", params.verbose); info("source = %s", params.src_fname); info("dest = %s", params.dst_fname); @@ -218,6 +225,7 @@ Usage: sbcast [OPTIONS] SOURCE DEST\n\ -F, --fanout=num specify message fanout\n\ -p, --preserve preserve modes and times of source file\n\ -s, --size=num block size in bytes (rounded off)\n\ + -t, --timeout=secs specify message timeout (seconds)\n\ -v, --verbose provide detailed event logging\n\ -V, --version print version information and exit\n\ \nHelp options:\n\ diff --git a/src/sbcast/sbcast.c b/src/sbcast/sbcast.c index c3cb7c3ff..636641107 100644 --- a/src/sbcast/sbcast.c +++ b/src/sbcast/sbcast.c @@ -1,9 +1,8 @@ /*****************************************************************************\ * sbcast.c - Broadcast a file to allocated nodes - * - * $Id: sbcast.c 6965 2006-01-04 23:31:07Z jette $ ***************************************************************************** - * Copyright (C) 2006 The Regents of the University of California. + * Copyright (C) 2006-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> * UCRL-CODE-226842. @@ -51,13 +50,13 @@ #include <sys/types.h> #include <sys/stat.h> +#include "src/common/forward.h" #include "src/common/hostlist.h" #include "src/common/log.h" #include "src/common/slurm_protocol_api.h" #include "src/common/slurm_protocol_interface.h" #include "src/common/xmalloc.h" #include "src/common/xstring.h" -#include "src/common/forward.h" #include "src/sbcast/sbcast.h" /* global variables */ @@ -220,8 +219,11 @@ static void _bcast_file(void) bcast_msg.last_block = 1; size_block += bcast_msg.block_len[i]; if (params.block_size - && (size_block >= params.block_size)) + && (size_block >= params.block_size)) { + for (i++; i<FILE_BLOCKS; i++) + bcast_msg.block_len[i] = 0; break; + } } send_rpc(&bcast_msg, alloc_resp); if (bcast_msg.last_block) diff --git a/src/sbcast/sbcast.h b/src/sbcast/sbcast.h index 9d31ff281..9a14292be 100644 --- a/src/sbcast/sbcast.h +++ b/src/sbcast/sbcast.h @@ -1,9 +1,8 @@ /****************************************************************************\ * sbcast.h - definitions used for sbcast data functions - * - * $Id: sbcast.h 6965 2006-01-04 23:31:07Z jette $ ***************************************************************************** - * Copyright (C) 2006 The Regents of the University of California. + * Copyright (C) 2006-2007 The Regents of the University of California. + * Copyright (C) 2008 Lawrence Livermore National Security. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). * Written by Morris Jette <jette1@llnl.gov> * UCRL-CODE-226842. @@ -54,6 +53,7 @@ struct sbcast_parameters { int fanout; bool force; bool preserve; + int timeout; int verbose; char *src_fname; char *dst_fname; diff --git a/src/scontrol/update_job.c b/src/scontrol/update_job.c index 6e337ab76..281c27b1a 100644 --- a/src/scontrol/update_job.c +++ b/src/scontrol/update_job.c @@ -209,6 +209,10 @@ scontrol_update_job (int argc, char *argv[]) job_msg.job_id = (uint32_t) strtol(&argv[i][6], (char **) NULL, 10); + else if (strncasecmp(argv[i], "Comment=", 8) == 0) { + job_msg.comment = &argv[i][8]; + update_cnt++; + } else if (strncasecmp(argv[i], "TimeLimit=", 10) == 0) { if ((strcasecmp(&argv[i][10], "UNLIMITED") == 0) || (strcasecmp(&argv[i][10], "INFINITE") == 0)) diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index 58d26e45f..0d6bce56b 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * controller.c - main control machine daemon for slurm - * $Id: controller.c 13156 2008-02-01 17:43:01Z da $ + * $Id: controller.c 13506 2008-03-07 00:13:15Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -429,6 +429,8 @@ int main(int argc, char *argv[]) verbose("Unable to remove pidfile '%s': %m", slurmctld_conf.slurmctld_pidfile); + jobacct_g_fini_slurmctld(); /* Save pending message traffic */ + #ifdef MEMORY_LEAK_DEBUG { /* This should purge all allocated memory, *\ @@ -456,7 +458,6 @@ int main(int argc, char *argv[]) /* Plugins are needed to purge job/node data structures, * unplug after other data structures are purged */ g_slurm_jobcomp_fini(); - jobacct_g_fini_slurmctld(); slurm_sched_fini(); slurm_select_fini(); checkpoint_fini(); diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 9397d9e10..165c9276d 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -3,7 +3,7 @@ * Note: there is a global job list (job_list), time stamp * (last_job_update), and hash table (job_hash) * - * $Id: job_mgr.c 13373 2008-02-27 16:47:13Z jette $ + * $Id: job_mgr.c 13533 2008-03-10 16:11:30Z jette $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -1038,7 +1038,7 @@ extern int kill_running_job_by_node_name(char *node_name, bool step_test) job_ptr->job_id); if (job_ptr->node_cnt == 0) { job_ptr->job_state &= (~JOB_COMPLETING); - delete_step_records(job_ptr, 1); + delete_step_records(job_ptr, 0); slurm_sched_schedule(); } if (node_ptr->comp_job_cnt) @@ -3672,6 +3672,9 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) info("update_job: setting features to %s for " "job_id %u", job_specs->features, job_specs->job_id); + } else { + info("update_job: cleared features for job %u", + job_specs->job_id); } } else { error("Attempt to change features for job %u", @@ -3680,6 +3683,14 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) } } + if (job_specs->comment) { + xfree(job_ptr->comment); + job_ptr->comment = job_specs->comment; + job_specs->comment = NULL; /* Nothing left to free */ + info("update_job: setting comment to %s for job_id %u", + job_ptr->comment, job_specs->job_id); + } + if (job_specs->name) { strncpy(job_ptr->name, job_specs->name, MAX_JOBNAME_LEN); info("update_job: setting name to %s for job_id %u", @@ -3767,10 +3778,13 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) if (job_specs->account) { xfree(job_ptr->account); if (job_specs->account[0] != '\0') { - job_ptr->account = job_specs->account ; + job_ptr->account = job_specs->account; + job_specs->account = NULL; /* Nothing left to free */ info("update_job: setting account to %s for job_id %u", job_ptr->account, job_specs->job_id); - job_specs->account = NULL; + } else { + info("update_job: cleared account for job_id %u", + job_specs->job_id); } } diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 6a7457334..bd17952d4 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -206,6 +206,8 @@ int schedule(void) #endif static bool wiki_sched = false; static bool wiki_sched_test = false; + time_t now = time(NULL); + DEF_TIMERS; START_TIMER; @@ -271,7 +273,7 @@ int schedule(void) #endif } else if (error_code == SLURM_SUCCESS) { /* job initiated */ - last_job_update = time(NULL); + last_job_update = now; #ifdef HAVE_BG select_g_get_jobinfo(job_ptr->select_jobinfo, SELECT_DATA_IONODES, @@ -300,13 +302,15 @@ int schedule(void) info("schedule: JobId=%u non-runnable: %s", job_ptr->job_id, slurm_strerror(error_code)); - last_job_update = time(NULL); - job_ptr->job_state = JOB_FAILED; - job_ptr->exit_code = 1; - job_ptr->state_reason = FAIL_BAD_CONSTRAINTS; - job_ptr->start_time = job_ptr->end_time = time(NULL); - job_completion_logger(job_ptr); - delete_job_details(job_ptr); + if (!wiki_sched) { + last_job_update = now; + job_ptr->job_state = JOB_FAILED; + job_ptr->exit_code = 1; + job_ptr->state_reason = FAIL_BAD_CONSTRAINTS; + job_ptr->start_time = job_ptr->end_time = now; + job_completion_logger(job_ptr); + delete_job_details(job_ptr); + } } } diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 5f07cff6f..403095cab 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -4,7 +4,7 @@ * hash table (node_hash_table), time stamp (last_node_update) and * configuration list (config_list) * - * $Id: node_mgr.c 13274 2008-02-14 21:32:07Z jette $ + * $Id: node_mgr.c 13552 2008-03-11 17:34:27Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -1524,7 +1524,8 @@ validate_node_specs (char *node_name, uint16_t cpus, node_flags; node_ptr->last_idle = now; } - xfree(node_ptr->reason); + if ((node_flags & NODE_STATE_DRAIN) == 0) + xfree(node_ptr->reason); jobacct_g_node_up(node_ptr, now); } else if ((base_state == NODE_STATE_DOWN) && (slurmctld_conf.ret2service == 1) && @@ -2234,6 +2235,7 @@ void make_node_idle(struct node_record *node_ptr, "%ld seconds", job_ptr->job_id, (long) delay); job_ptr->job_state &= (~JOB_COMPLETING); + delete_step_records(job_ptr, 0); slurm_sched_schedule(); } } else { diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index b8903bc62..b92527b9a 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -2,7 +2,7 @@ * node_scheduler.c - select and allocated nodes to jobs * Note: there is a global node table (node_record_table_ptr) * - * $Id: node_scheduler.c 13234 2008-02-08 22:13:39Z jette $ + * $Id: node_scheduler.c 13639 2008-03-18 19:25:32Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -223,7 +223,7 @@ extern void deallocate_nodes(struct job_record *job_ptr, bool timeout, if ((agent_args->node_count - down_node_cnt) == 0) { job_ptr->job_state &= (~JOB_COMPLETING); - delete_step_records(job_ptr, 1); + delete_step_records(job_ptr, 0); slurm_sched_schedule(); } @@ -308,6 +308,10 @@ _pick_best_load(struct job_record *job_ptr, bitstr_t * bitmap, ((req_nodes > min_nodes) && (set_cnt < req_nodes))) return error_code; /* not usable */ + if (job_ptr->details && job_ptr->details->req_node_bitmap && + (!bit_super_set(job_ptr->details->req_node_bitmap, bitmap))) + return error_code; /* required nodes not available */ + basemap = bit_copy(bitmap); if (basemap == NULL) fatal("bit_copy malloc failure"); @@ -1710,6 +1714,7 @@ extern void re_kill_job(struct job_record *job_ptr) if ((--job_ptr->node_cnt) == 0) { last_node_update = time(NULL); job_ptr->job_state &= (~JOB_COMPLETING); + delete_step_records(job_ptr, 0); slurm_sched_schedule(); } continue; diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c index 2da3cfdc5..104f6d9f7 100644 --- a/src/slurmctld/step_mgr.c +++ b/src/slurmctld/step_mgr.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * step_mgr.c - manage the job step information of slurm - * $Id: step_mgr.c 13155 2008-02-01 17:30:43Z jette $ + * $Id: step_mgr.c 13414 2008-02-28 23:22:33Z da $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -1252,9 +1252,10 @@ extern int step_partial_comp(step_complete_msg_t *req, int *rem, if (step_ptr->batch_step) { if(rem) *rem = 0; - step_ptr->exit_code = 0; + step_ptr->exit_code = req->step_rc; if (max_rc) *max_rc = step_ptr->exit_code; + jobacct_g_aggregate(step_ptr->jobacct, req->jobacct); /* we don't want to delete the step record here since right after we delete this step again if we delete it here we won't find it when we try the second diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index 0fc820149..494dc360b 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * src/slurmd/slurmstepd/mgr.c - job manager functions for slurmstepd - * $Id: mgr.c 13322 2008-02-21 19:06:27Z da $ + * $Id: mgr.c 13414 2008-02-28 23:22:33Z da $ ***************************************************************************** * Copyright (C) 2002-2007 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -173,6 +173,7 @@ static void _slurmd_job_log_init(slurmd_job_t *job); static void _wait_for_io(slurmd_job_t *job); static int _send_exit_msg(slurmd_job_t *job, uint32_t *tid, int n, int status); +static void _wait_for_children_slurmstepd(slurmd_job_t *job); static int _send_pending_exit_msgs(slurmd_job_t *job); static void _send_step_complete_msgs(slurmd_job_t *job); static void _wait_for_all_tasks(slurmd_job_t *job); @@ -229,7 +230,10 @@ mgr_launch_tasks_setup(launch_tasks_request_msg_t *msg, slurm_addr *cli, static void _batch_finish(slurmd_job_t *job, int rc) { - int status = job->task[0]->estatus; + int i; + for (i = 0; i < job->ntasks; i++) + step_complete.step_rc = MAX(step_complete.step_rc, + WEXITSTATUS(job->task[i]->estatus)); if (job->argv[0] && (unlink(job->argv[0]) < 0)) error("unlink(%s): %m", job->argv[0]); @@ -238,11 +242,12 @@ _batch_finish(slurmd_job_t *job, int rc) xfree(job->batchdir); if ((job->stepid == NO_VAL) || (job->stepid == SLURM_BATCH_SCRIPT)) { verbose("job %u completed with slurm_rc = %d, job_rc = %d", - job->jobid, rc, status); - _send_complete_batch_script_msg(job, rc, status); + job->jobid, rc, step_complete.step_rc); + _send_complete_batch_script_msg(job, rc, job->task[0]->estatus); } else { + _wait_for_children_slurmstepd(job); verbose("job %u.%u completed with slurm_rc = %d, job_rc = %d", - job->jobid, job->stepid, rc, status); + job->jobid, job->stepid, rc, step_complete.step_rc); _send_step_complete_msgs(job); } } diff --git a/src/srun/opt.c b/src/srun/opt.c index 4802c930b..d766ce74c 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -1,6 +1,6 @@ /*****************************************************************************\ * opt.c - options processing for srun - * $Id: opt.c 13270 2008-02-14 19:40:44Z da $ + * $Id: opt.c 13623 2008-03-17 16:46:23Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -143,6 +143,7 @@ #define LONG_OPT_MULTI 0x122 #define LONG_OPT_NO_REQUEUE 0x123 #define LONG_OPT_COMMENT 0x124 +#define LONG_OPT_REQUEUE 0x125 #define LONG_OPT_SOCKETSPERNODE 0x130 #define LONG_OPT_CORESPERSOCKET 0x131 #define LONG_OPT_THREADSPERCORE 0x132 @@ -1408,6 +1409,7 @@ void set_options(const int argc, char **argv, int first) {"ctrl-comm-ifhn", required_argument, 0, LONG_OPT_CTRL_COMM_IFHN}, {"multi-prog", no_argument, 0, LONG_OPT_MULTI}, {"no-requeue", no_argument, 0, LONG_OPT_NO_REQUEUE}, + {"requeue", no_argument, 0, LONG_OPT_REQUEUE}, {"comment", required_argument, 0, LONG_OPT_COMMENT}, {"sockets-per-node", required_argument, 0, LONG_OPT_SOCKETSPERNODE}, {"cores-per-socket", required_argument, 0, LONG_OPT_CORESPERSOCKET}, @@ -1941,6 +1943,9 @@ void set_options(const int argc, char **argv, int first) case LONG_OPT_NO_REQUEUE: opt.no_requeue = true; break; + case LONG_OPT_REQUEUE: + opt.no_requeue = false; /* the default */ + break; case LONG_OPT_COMMENT: if(!first && opt.comment) break; diff --git a/src/srun/opt.h b/src/srun/opt.h index e7e67e2b7..8104cb48c 100644 --- a/src/srun/opt.h +++ b/src/srun/opt.h @@ -1,6 +1,6 @@ /*****************************************************************************\ * opt.h - definitions for srun option processing - * $Id: opt.h 12697 2007-11-27 22:02:29Z jette $ + * $Id: opt.h 13407 2008-02-28 20:13:43Z jette $ ***************************************************************************** * Copyright (C) 2002-2006 The Regents of the University of California. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). @@ -156,7 +156,7 @@ typedef struct srun_options { bool labelio; /* --label-output, -l */ bool unbuffered; /* --unbuffered, -u */ bool allocate; /* --allocate, -A */ - bool noshell; /* --noshell */ + bool noshell; /* --no-shell */ bool overcommit; /* --overcommit, -O */ bool batch; /* --batch, -b */ bool no_kill; /* --no-kill, -k */ -- GitLab